1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelProgram.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Renderer/Primitive.hpp" 19 #include "Renderer/Renderer.hpp" 20 21 namespace sw 22 { 23 extern bool postBlendSRGB; 24 extern bool booleanFaceRegister; 25 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 26 extern bool fullPixelPositionRegister; 27 28 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 29 { 30 if(shader->getShaderModel() >= 0x0300) 31 { 32 if(shader->isVPosDeclared()) 33 { 34 if(!halfIntegerCoordinates) 35 { 36 vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); 37 vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); 38 } 39 else 40 { 41 vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); 42 vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); 43 } 44 45 if(fullPixelPositionRegister) 46 { 47 vPos.z = z[0]; // FIXME: Centroid? 48 vPos.w = w; // FIXME: Centroid? 49 } 50 } 51 52 if(shader->isVFaceDeclared()) 53 { 54 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area)); 55 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area; 56 57 vFace.x = face; 58 vFace.y = face; 59 vFace.z = face; 60 vFace.w = face; 61 } 62 } 63 } 64 65 void PixelProgram::applyShader(Int cMask[4]) 66 { 67 enableIndex = 0; 68 stackIndex = 0; 69 70 if(shader->containsLeaveInstruction()) 71 { 72 enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 73 } 74 75 for(int i = 0; i < RENDERTARGETS; i++) 76 { 77 if(state.targetFormat[i] != FORMAT_NULL) 78 { 79 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); 80 } 81 } 82 83 // Create all call site return blocks up front 84 for(size_t i = 0; i < shader->getLength(); i++) 85 { 86 const Shader::Instruction *instruction = shader->getInstruction(i); 87 Shader::Opcode opcode = instruction->opcode; 88 89 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) 90 { 91 const Dst &dst = instruction->dst; 92 93 ASSERT(callRetBlock[dst.label].size() == dst.callSite); 94 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); 95 } 96 } 97 98 bool broadcastColor0 = true; 99 100 for(size_t i = 0; i < shader->getLength(); i++) 101 { 102 const Shader::Instruction *instruction = shader->getInstruction(i); 103 Shader::Opcode opcode = instruction->opcode; 104 105 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 106 { 107 continue; 108 } 109 110 const Dst &dst = instruction->dst; 111 const Src &src0 = instruction->src[0]; 112 const Src &src1 = instruction->src[1]; 113 const Src &src2 = instruction->src[2]; 114 const Src &src3 = instruction->src[3]; 115 const Src &src4 = instruction->src[4]; 116 117 bool predicate = instruction->predicate; 118 Control control = instruction->control; 119 bool pp = dst.partialPrecision; 120 bool project = instruction->project; 121 bool bias = instruction->bias; 122 123 Vector4f d; 124 Vector4f s0; 125 Vector4f s1; 126 Vector4f s2; 127 Vector4f s3; 128 Vector4f s4; 129 130 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input 131 { 132 if(dst.type == Shader::PARAMETER_TEXTURE) 133 { 134 d.x = v[2 + dst.index].x; 135 d.y = v[2 + dst.index].y; 136 d.z = v[2 + dst.index].z; 137 d.w = v[2 + dst.index].w; 138 } 139 else 140 { 141 d = r[dst.index]; 142 } 143 } 144 145 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 146 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 147 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 148 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); 149 if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); 150 151 switch(opcode) 152 { 153 case Shader::OPCODE_PS_2_0: break; 154 case Shader::OPCODE_PS_2_x: break; 155 case Shader::OPCODE_PS_3_0: break; 156 case Shader::OPCODE_DEF: break; 157 case Shader::OPCODE_DCL: break; 158 case Shader::OPCODE_NOP: break; 159 case Shader::OPCODE_MOV: mov(d, s0); break; 160 case Shader::OPCODE_NEG: neg(d, s0); break; 161 case Shader::OPCODE_INEG: ineg(d, s0); break; 162 case Shader::OPCODE_F2B: f2b(d, s0); break; 163 case Shader::OPCODE_B2F: b2f(d, s0); break; 164 case Shader::OPCODE_F2I: f2i(d, s0); break; 165 case Shader::OPCODE_I2F: i2f(d, s0); break; 166 case Shader::OPCODE_F2U: f2u(d, s0); break; 167 case Shader::OPCODE_U2F: u2f(d, s0); break; 168 case Shader::OPCODE_I2B: i2b(d, s0); break; 169 case Shader::OPCODE_B2I: b2i(d, s0); break; 170 case Shader::OPCODE_ADD: add(d, s0, s1); break; 171 case Shader::OPCODE_IADD: iadd(d, s0, s1); break; 172 case Shader::OPCODE_SUB: sub(d, s0, s1); break; 173 case Shader::OPCODE_ISUB: isub(d, s0, s1); break; 174 case Shader::OPCODE_MUL: mul(d, s0, s1); break; 175 case Shader::OPCODE_IMUL: imul(d, s0, s1); break; 176 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; 177 case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; 178 case Shader::OPCODE_DP1: dp1(d, s0, s1); break; 179 case Shader::OPCODE_DP2: dp2(d, s0, s1); break; 180 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; 181 case Shader::OPCODE_DP3: dp3(d, s0, s1); break; 182 case Shader::OPCODE_DP4: dp4(d, s0, s1); break; 183 case Shader::OPCODE_DET2: det2(d, s0, s1); break; 184 case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; 185 case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; 186 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; 187 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; 188 case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; 189 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; 190 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; 191 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; 192 case Shader::OPCODE_FRC: frc(d, s0); break; 193 case Shader::OPCODE_TRUNC: trunc(d, s0); break; 194 case Shader::OPCODE_FLOOR: floor(d, s0); break; 195 case Shader::OPCODE_ROUND: round(d, s0); break; 196 case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; 197 case Shader::OPCODE_CEIL: ceil(d, s0); break; 198 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; 199 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; 200 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; 201 case Shader::OPCODE_LOG2: log2(d, s0, pp); break; 202 case Shader::OPCODE_EXP: exp(d, s0, pp); break; 203 case Shader::OPCODE_LOG: log(d, s0, pp); break; 204 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; 205 case Shader::OPCODE_DIV: div(d, s0, s1); break; 206 case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; 207 case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; 208 case Shader::OPCODE_MOD: mod(d, s0, s1); break; 209 case Shader::OPCODE_IMOD: imod(d, s0, s1); break; 210 case Shader::OPCODE_UMOD: umod(d, s0, s1); break; 211 case Shader::OPCODE_SHL: shl(d, s0, s1); break; 212 case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; 213 case Shader::OPCODE_USHR: ushr(d, s0, s1); break; 214 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; 215 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; 216 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; 217 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; 218 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; 219 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; 220 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; 221 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; 222 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; 223 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; 224 case Shader::OPCODE_MIN: min(d, s0, s1); break; 225 case Shader::OPCODE_IMIN: imin(d, s0, s1); break; 226 case Shader::OPCODE_UMIN: umin(d, s0, s1); break; 227 case Shader::OPCODE_MAX: max(d, s0, s1); break; 228 case Shader::OPCODE_IMAX: imax(d, s0, s1); break; 229 case Shader::OPCODE_UMAX: umax(d, s0, s1); break; 230 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; 231 case Shader::OPCODE_STEP: step(d, s0, s1); break; 232 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; 233 case Shader::OPCODE_ISINF: isinf(d, s0); break; 234 case Shader::OPCODE_ISNAN: isnan(d, s0); break; 235 case Shader::OPCODE_FLOATBITSTOINT: 236 case Shader::OPCODE_FLOATBITSTOUINT: 237 case Shader::OPCODE_INTBITSTOFLOAT: 238 case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; 239 case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; 240 case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; 241 case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; 242 case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; 243 case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; 244 case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; 245 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; 246 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; 247 case Shader::OPCODE_SGN: sgn(d, s0); break; 248 case Shader::OPCODE_ISGN: isgn(d, s0); break; 249 case Shader::OPCODE_CRS: crs(d, s0, s1); break; 250 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; 251 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; 252 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; 253 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; 254 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; 255 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; 256 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; 257 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; 258 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; 259 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; 260 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; 261 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; 262 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; 263 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; 264 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; 265 case Shader::OPCODE_ABS: abs(d, s0); break; 266 case Shader::OPCODE_IABS: iabs(d, s0); break; 267 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; 268 case Shader::OPCODE_COS: cos(d, s0, pp); break; 269 case Shader::OPCODE_SIN: sin(d, s0, pp); break; 270 case Shader::OPCODE_TAN: tan(d, s0, pp); break; 271 case Shader::OPCODE_ACOS: acos(d, s0, pp); break; 272 case Shader::OPCODE_ASIN: asin(d, s0, pp); break; 273 case Shader::OPCODE_ATAN: atan(d, s0, pp); break; 274 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; 275 case Shader::OPCODE_COSH: cosh(d, s0, pp); break; 276 case Shader::OPCODE_SINH: sinh(d, s0, pp); break; 277 case Shader::OPCODE_TANH: tanh(d, s0, pp); break; 278 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; 279 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; 280 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; 281 case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; 282 case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; 283 case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; 284 case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; 285 case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; 286 case Shader::OPCODE_TEX: TEX(d, s0, src1, project, bias); break; 287 case Shader::OPCODE_TEXLDD: TEXGRAD(d, s0, src1, s2, s3); break; 288 case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; 289 case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; 290 case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; 291 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; 292 case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; 293 case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; 294 case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; 295 case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; 296 case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; 297 case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; 298 case Shader::OPCODE_TEXBIAS: TEXBIAS(d, s0, src1, s2.x); break; 299 case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x); break; 300 case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; 301 case Shader::OPCODE_DFDX: DFDX(d, s0); break; 302 case Shader::OPCODE_DFDY: DFDY(d, s0); break; 303 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; 304 case Shader::OPCODE_BREAK: BREAK(); break; 305 case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; 306 case Shader::OPCODE_BREAKP: BREAKP(src0); break; 307 case Shader::OPCODE_CONTINUE: CONTINUE(); break; 308 case Shader::OPCODE_TEST: TEST(); break; 309 case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; 310 case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; 311 case Shader::OPCODE_ELSE: ELSE(); break; 312 case Shader::OPCODE_ENDIF: ENDIF(); break; 313 case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; 314 case Shader::OPCODE_ENDREP: ENDREP(); break; 315 case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; 316 case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; 317 case Shader::OPCODE_IF: IF(src0); break; 318 case Shader::OPCODE_IFC: IFC(s0, s1, control); break; 319 case Shader::OPCODE_LABEL: LABEL(dst.index); break; 320 case Shader::OPCODE_LOOP: LOOP(src1); break; 321 case Shader::OPCODE_REP: REP(src0); break; 322 case Shader::OPCODE_WHILE: WHILE(src0); break; 323 case Shader::OPCODE_SWITCH: SWITCH(); break; 324 case Shader::OPCODE_RET: RET(); break; 325 case Shader::OPCODE_LEAVE: LEAVE(); break; 326 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; 327 case Shader::OPCODE_ALL: all(d.x, s0); break; 328 case Shader::OPCODE_ANY: any(d.x, s0); break; 329 case Shader::OPCODE_NOT: bitwise_not(d, s0); break; 330 case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; 331 case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; 332 case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; 333 case Shader::OPCODE_EQ: equal(d, s0, s1); break; 334 case Shader::OPCODE_NE: notEqual(d, s0, s1); break; 335 case Shader::OPCODE_END: break; 336 default: 337 ASSERT(false); 338 } 339 340 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) 341 { 342 if(dst.saturate) 343 { 344 if(dst.x) d.x = Max(d.x, Float4(0.0f)); 345 if(dst.y) d.y = Max(d.y, Float4(0.0f)); 346 if(dst.z) d.z = Max(d.z, Float4(0.0f)); 347 if(dst.w) d.w = Max(d.w, Float4(0.0f)); 348 349 if(dst.x) d.x = Min(d.x, Float4(1.0f)); 350 if(dst.y) d.y = Min(d.y, Float4(1.0f)); 351 if(dst.z) d.z = Min(d.z, Float4(1.0f)); 352 if(dst.w) d.w = Min(d.w, Float4(1.0f)); 353 } 354 355 if(instruction->isPredicated()) 356 { 357 Vector4f pDst; // FIXME: Rename 358 359 switch(dst.type) 360 { 361 case Shader::PARAMETER_TEMP: 362 if(dst.rel.type == Shader::PARAMETER_VOID) 363 { 364 if(dst.x) pDst.x = r[dst.index].x; 365 if(dst.y) pDst.y = r[dst.index].y; 366 if(dst.z) pDst.z = r[dst.index].z; 367 if(dst.w) pDst.w = r[dst.index].w; 368 } 369 else 370 { 371 Int a = relativeAddress(dst); 372 373 if(dst.x) pDst.x = r[dst.index + a].x; 374 if(dst.y) pDst.y = r[dst.index + a].y; 375 if(dst.z) pDst.z = r[dst.index + a].z; 376 if(dst.w) pDst.w = r[dst.index + a].w; 377 } 378 break; 379 case Shader::PARAMETER_COLOROUT: 380 if(dst.rel.type == Shader::PARAMETER_VOID) 381 { 382 if(dst.x) pDst.x = oC[dst.index].x; 383 if(dst.y) pDst.y = oC[dst.index].y; 384 if(dst.z) pDst.z = oC[dst.index].z; 385 if(dst.w) pDst.w = oC[dst.index].w; 386 } 387 else 388 { 389 Int a = relativeAddress(dst) + dst.index; 390 391 if(dst.x) pDst.x = oC[a].x; 392 if(dst.y) pDst.y = oC[a].y; 393 if(dst.z) pDst.z = oC[a].z; 394 if(dst.w) pDst.w = oC[a].w; 395 } 396 break; 397 case Shader::PARAMETER_PREDICATE: 398 if(dst.x) pDst.x = p0.x; 399 if(dst.y) pDst.y = p0.y; 400 if(dst.z) pDst.z = p0.z; 401 if(dst.w) pDst.w = p0.w; 402 break; 403 case Shader::PARAMETER_DEPTHOUT: 404 pDst.x = oDepth; 405 break; 406 default: 407 ASSERT(false); 408 } 409 410 Int4 enable = enableMask(instruction); 411 412 Int4 xEnable = enable; 413 Int4 yEnable = enable; 414 Int4 zEnable = enable; 415 Int4 wEnable = enable; 416 417 if(predicate) 418 { 419 unsigned char pSwizzle = instruction->predicateSwizzle; 420 421 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; 422 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; 423 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; 424 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; 425 426 if(!instruction->predicateNot) 427 { 428 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); 429 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); 430 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); 431 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); 432 } 433 else 434 { 435 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); 436 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); 437 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); 438 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); 439 } 440 } 441 442 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 443 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 444 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 445 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 446 447 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 448 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 449 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 450 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 451 } 452 453 switch(dst.type) 454 { 455 case Shader::PARAMETER_TEMP: 456 if(dst.rel.type == Shader::PARAMETER_VOID) 457 { 458 if(dst.x) r[dst.index].x = d.x; 459 if(dst.y) r[dst.index].y = d.y; 460 if(dst.z) r[dst.index].z = d.z; 461 if(dst.w) r[dst.index].w = d.w; 462 } 463 else 464 { 465 Int a = relativeAddress(dst); 466 467 if(dst.x) r[dst.index + a].x = d.x; 468 if(dst.y) r[dst.index + a].y = d.y; 469 if(dst.z) r[dst.index + a].z = d.z; 470 if(dst.w) r[dst.index + a].w = d.w; 471 } 472 break; 473 case Shader::PARAMETER_COLOROUT: 474 if(dst.rel.type == Shader::PARAMETER_VOID) 475 { 476 broadcastColor0 = (dst.index == 0) && broadcastColor0; 477 478 if(dst.x) { oC[dst.index].x = d.x; } 479 if(dst.y) { oC[dst.index].y = d.y; } 480 if(dst.z) { oC[dst.index].z = d.z; } 481 if(dst.w) { oC[dst.index].w = d.w; } 482 } 483 else 484 { 485 broadcastColor0 = false; 486 Int a = relativeAddress(dst) + dst.index; 487 488 if(dst.x) { oC[a].x = d.x; } 489 if(dst.y) { oC[a].y = d.y; } 490 if(dst.z) { oC[a].z = d.z; } 491 if(dst.w) { oC[a].w = d.w; } 492 } 493 break; 494 case Shader::PARAMETER_PREDICATE: 495 if(dst.x) p0.x = d.x; 496 if(dst.y) p0.y = d.y; 497 if(dst.z) p0.z = d.z; 498 if(dst.w) p0.w = d.w; 499 break; 500 case Shader::PARAMETER_DEPTHOUT: 501 oDepth = d.x; 502 break; 503 default: 504 ASSERT(false); 505 } 506 } 507 } 508 509 if(currentLabel != -1) 510 { 511 Nucleus::setInsertBlock(returnBlock); 512 } 513 514 if(broadcastColor0) 515 { 516 for(int i = 0; i < RENDERTARGETS; i++) 517 { 518 c[i] = oC[0]; 519 } 520 } 521 else 522 { 523 for(int i = 0; i < RENDERTARGETS; i++) 524 { 525 c[i] = oC[i]; 526 } 527 } 528 529 clampColor(c); 530 531 if(state.depthOverride) 532 { 533 oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f)); 534 } 535 } 536 537 Bool PixelProgram::alphaTest(Int cMask[4]) 538 { 539 if(!state.alphaTestActive()) 540 { 541 return true; 542 } 543 544 Int aMask; 545 546 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 547 { 548 Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); 549 550 PixelRoutine::alphaTest(aMask, alpha); 551 552 for(unsigned int q = 0; q < state.multiSample; q++) 553 { 554 cMask[q] &= aMask; 555 } 556 } 557 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 558 { 559 alphaToCoverage(cMask, c[0].w); 560 } 561 else ASSERT(false); 562 563 Int pass = cMask[0]; 564 565 for(unsigned int q = 1; q < state.multiSample; q++) 566 { 567 pass = pass | cMask[q]; 568 } 569 570 return pass != 0x0; 571 } 572 573 void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 574 { 575 for(int index = 0; index < RENDERTARGETS; index++) 576 { 577 if(!state.colorWriteActive(index)) 578 { 579 continue; 580 } 581 582 if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) 583 { 584 c[index].x = linearToSRGB(c[index].x); 585 c[index].y = linearToSRGB(c[index].y); 586 c[index].z = linearToSRGB(c[index].z); 587 } 588 589 if(index == 0) 590 { 591 fogBlend(c[index], fog); 592 } 593 594 switch(state.targetFormat[index]) 595 { 596 case FORMAT_R5G6B5: 597 case FORMAT_X8R8G8B8: 598 case FORMAT_X8B8G8R8: 599 case FORMAT_A8R8G8B8: 600 case FORMAT_A8B8G8R8: 601 case FORMAT_SRGB8_X8: 602 case FORMAT_SRGB8_A8: 603 case FORMAT_G8R8: 604 case FORMAT_R8: 605 case FORMAT_A8: 606 case FORMAT_G16R16: 607 case FORMAT_A16B16G16R16: 608 for(unsigned int q = 0; q < state.multiSample; q++) 609 { 610 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 611 Vector4s color; 612 613 if(state.targetFormat[index] == FORMAT_R5G6B5) 614 { 615 color.x = UShort4(c[index].x * Float4(0xFBFF), false); 616 color.y = UShort4(c[index].y * Float4(0xFDFF), false); 617 color.z = UShort4(c[index].z * Float4(0xFBFF), false); 618 color.w = UShort4(c[index].w * Float4(0xFFFF), false); 619 } 620 else 621 { 622 color.x = convertFixed16(c[index].x, false); 623 color.y = convertFixed16(c[index].y, false); 624 color.z = convertFixed16(c[index].z, false); 625 color.w = convertFixed16(c[index].w, false); 626 } 627 628 if(state.multiSampleMask & (1 << q)) 629 { 630 alphaBlend(index, buffer, color, x); 631 logicOperation(index, buffer, color, x); 632 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 633 } 634 } 635 break; 636 case FORMAT_R32F: 637 case FORMAT_G32R32F: 638 case FORMAT_X32B32G32R32F: 639 case FORMAT_A32B32G32R32F: 640 case FORMAT_X32B32G32R32F_UNSIGNED: 641 case FORMAT_R32I: 642 case FORMAT_G32R32I: 643 case FORMAT_A32B32G32R32I: 644 case FORMAT_R32UI: 645 case FORMAT_G32R32UI: 646 case FORMAT_A32B32G32R32UI: 647 case FORMAT_R16I: 648 case FORMAT_G16R16I: 649 case FORMAT_A16B16G16R16I: 650 case FORMAT_R16UI: 651 case FORMAT_G16R16UI: 652 case FORMAT_A16B16G16R16UI: 653 case FORMAT_R8I: 654 case FORMAT_G8R8I: 655 case FORMAT_A8B8G8R8I: 656 case FORMAT_R8UI: 657 case FORMAT_G8R8UI: 658 case FORMAT_A8B8G8R8UI: 659 for(unsigned int q = 0; q < state.multiSample; q++) 660 { 661 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 662 Vector4f color = c[index]; 663 664 if(state.multiSampleMask & (1 << q)) 665 { 666 alphaBlend(index, buffer, color, x); 667 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 668 } 669 } 670 break; 671 default: 672 ASSERT(false); 673 } 674 } 675 } 676 677 Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 678 { 679 Vector4f tmp; 680 681 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) 682 { 683 tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function); 684 } 685 else 686 { 687 Int index = As<Int>(Float(fetchRegister(sampler).x.x)); 688 689 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 690 { 691 if(shader->usesSampler(i)) 692 { 693 If(index == i) 694 { 695 tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function); 696 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture 697 } 698 } 699 } 700 } 701 702 Vector4f c; 703 c.x = tmp[(sampler.swizzle >> 0) & 0x3]; 704 c.y = tmp[(sampler.swizzle >> 2) & 0x3]; 705 c.z = tmp[(sampler.swizzle >> 4) & 0x3]; 706 c.w = tmp[(sampler.swizzle >> 6) & 0x3]; 707 708 return c; 709 } 710 711 Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 712 { 713 #if PERF_PROFILE 714 Long texTime = Ticks(); 715 #endif 716 717 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); 718 Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function); 719 720 #if PERF_PROFILE 721 cycles[PERF_TEX] += Ticks() - texTime; 722 #endif 723 724 return c; 725 } 726 727 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) 728 { 729 for(int index = 0; index < RENDERTARGETS; index++) 730 { 731 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) 732 { 733 continue; 734 } 735 736 switch(state.targetFormat[index]) 737 { 738 case FORMAT_NULL: 739 break; 740 case FORMAT_R5G6B5: 741 case FORMAT_A8R8G8B8: 742 case FORMAT_A8B8G8R8: 743 case FORMAT_X8R8G8B8: 744 case FORMAT_X8B8G8R8: 745 case FORMAT_SRGB8_X8: 746 case FORMAT_SRGB8_A8: 747 case FORMAT_G8R8: 748 case FORMAT_R8: 749 case FORMAT_A8: 750 case FORMAT_G16R16: 751 case FORMAT_A16B16G16R16: 752 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); 753 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); 754 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); 755 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); 756 break; 757 case FORMAT_R32F: 758 case FORMAT_G32R32F: 759 case FORMAT_X32B32G32R32F: 760 case FORMAT_A32B32G32R32F: 761 case FORMAT_R32I: 762 case FORMAT_G32R32I: 763 case FORMAT_A32B32G32R32I: 764 case FORMAT_R32UI: 765 case FORMAT_G32R32UI: 766 case FORMAT_A32B32G32R32UI: 767 case FORMAT_R16I: 768 case FORMAT_G16R16I: 769 case FORMAT_A16B16G16R16I: 770 case FORMAT_R16UI: 771 case FORMAT_G16R16UI: 772 case FORMAT_A16B16G16R16UI: 773 case FORMAT_R8I: 774 case FORMAT_G8R8I: 775 case FORMAT_A8B8G8R8I: 776 case FORMAT_R8UI: 777 case FORMAT_G8R8UI: 778 case FORMAT_A8B8G8R8UI: 779 break; 780 case FORMAT_X32B32G32R32F_UNSIGNED: 781 oC[index].x = Max(oC[index].x, Float4(0.0f)); 782 oC[index].y = Max(oC[index].y, Float4(0.0f)); 783 oC[index].z = Max(oC[index].z, Float4(0.0f)); 784 oC[index].w = Max(oC[index].w, Float4(0.0f)); 785 break; 786 default: 787 ASSERT(false); 788 } 789 } 790 } 791 792 Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) 793 { 794 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF); 795 796 if(!whileTest) 797 { 798 if(shader->containsBreakInstruction() && instruction->analysisBreak) 799 { 800 enable &= enableBreak; 801 } 802 803 if(shader->containsContinueInstruction() && instruction->analysisContinue) 804 { 805 enable &= enableContinue; 806 } 807 808 if(shader->containsLeaveInstruction() && instruction->analysisLeave) 809 { 810 enable &= enableLeave; 811 } 812 } 813 814 return enable; 815 } 816 817 Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) 818 { 819 Vector4f reg; 820 unsigned int i = src.index + offset; 821 822 switch(src.type) 823 { 824 case Shader::PARAMETER_TEMP: 825 if(src.rel.type == Shader::PARAMETER_VOID) 826 { 827 reg = r[i]; 828 } 829 else 830 { 831 Int a = relativeAddress(src, src.bufferIndex); 832 833 reg = r[i + a]; 834 } 835 break; 836 case Shader::PARAMETER_INPUT: 837 { 838 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 839 { 840 reg = v[i]; 841 } 842 else 843 { 844 Int a = relativeAddress(src, src.bufferIndex); 845 846 reg = v[i + a]; 847 } 848 } 849 break; 850 case Shader::PARAMETER_CONST: 851 reg = readConstant(src, offset); 852 break; 853 case Shader::PARAMETER_TEXTURE: 854 reg = v[2 + i]; 855 break; 856 case Shader::PARAMETER_MISCTYPE: 857 if(src.index == Shader::VPosIndex) reg = vPos; 858 if(src.index == Shader::VFaceIndex) reg = vFace; 859 break; 860 case Shader::PARAMETER_SAMPLER: 861 if(src.rel.type == Shader::PARAMETER_VOID) 862 { 863 reg.x = As<Float4>(Int4(i)); 864 } 865 else if(src.rel.type == Shader::PARAMETER_TEMP) 866 { 867 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); 868 } 869 return reg; 870 case Shader::PARAMETER_PREDICATE: return reg; // Dummy 871 case Shader::PARAMETER_VOID: return reg; // Dummy 872 case Shader::PARAMETER_FLOAT4LITERAL: 873 reg.x = Float4(src.value[0]); 874 reg.y = Float4(src.value[1]); 875 reg.z = Float4(src.value[2]); 876 reg.w = Float4(src.value[3]); 877 break; 878 case Shader::PARAMETER_CONSTINT: return reg; // Dummy 879 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy 880 case Shader::PARAMETER_LOOP: return reg; // Dummy 881 case Shader::PARAMETER_COLOROUT: 882 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 883 { 884 reg = oC[i]; 885 } 886 else 887 { 888 Int a = relativeAddress(src, src.bufferIndex); 889 890 reg = oC[i + a]; 891 } 892 break; 893 case Shader::PARAMETER_DEPTHOUT: 894 reg.x = oDepth; 895 break; 896 default: 897 ASSERT(false); 898 } 899 900 const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; 901 const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; 902 const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; 903 const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; 904 905 Vector4f mod; 906 907 switch(src.modifier) 908 { 909 case Shader::MODIFIER_NONE: 910 mod.x = x; 911 mod.y = y; 912 mod.z = z; 913 mod.w = w; 914 break; 915 case Shader::MODIFIER_NEGATE: 916 mod.x = -x; 917 mod.y = -y; 918 mod.z = -z; 919 mod.w = -w; 920 break; 921 case Shader::MODIFIER_ABS: 922 mod.x = Abs(x); 923 mod.y = Abs(y); 924 mod.z = Abs(z); 925 mod.w = Abs(w); 926 break; 927 case Shader::MODIFIER_ABS_NEGATE: 928 mod.x = -Abs(x); 929 mod.y = -Abs(y); 930 mod.z = -Abs(z); 931 mod.w = -Abs(w); 932 break; 933 case Shader::MODIFIER_NOT: 934 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); 935 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); 936 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); 937 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); 938 break; 939 default: 940 ASSERT(false); 941 } 942 943 return mod; 944 } 945 946 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) 947 { 948 if(bufferIndex == -1) 949 { 950 return data + OFFSET(DrawData, ps.c[index]); 951 } 952 else 953 { 954 return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; 955 } 956 } 957 958 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) 959 { 960 return uniformAddress(bufferIndex, index) + offset * sizeof(float4); 961 } 962 963 Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) 964 { 965 Vector4f c; 966 unsigned int i = src.index + offset; 967 968 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 969 { 970 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); 971 972 c.x = c.x.xxxx; 973 c.y = c.y.yyyy; 974 c.z = c.z.zzzz; 975 c.w = c.w.wwww; 976 977 if(shader->containsDefineInstruction()) // Constant may be known at compile time 978 { 979 for(size_t j = 0; j < shader->getLength(); j++) 980 { 981 const Shader::Instruction &instruction = *shader->getInstruction(j); 982 983 if(instruction.opcode == Shader::OPCODE_DEF) 984 { 985 if(instruction.dst.index == i) 986 { 987 c.x = Float4(instruction.src[0].value[0]); 988 c.y = Float4(instruction.src[0].value[1]); 989 c.z = Float4(instruction.src[0].value[2]); 990 c.w = Float4(instruction.src[0].value[3]); 991 992 break; 993 } 994 } 995 } 996 } 997 } 998 else if(src.rel.type == Shader::PARAMETER_LOOP) 999 { 1000 Int loopCounter = aL[loopDepth]; 1001 1002 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter)); 1003 1004 c.x = c.x.xxxx; 1005 c.y = c.y.yyyy; 1006 c.z = c.z.zzzz; 1007 c.w = c.w.wwww; 1008 } 1009 else 1010 { 1011 Int a = relativeAddress(src, src.bufferIndex); 1012 1013 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); 1014 1015 c.x = c.x.xxxx; 1016 c.y = c.y.yyyy; 1017 c.z = c.z.zzzz; 1018 c.w = c.w.wwww; 1019 } 1020 1021 return c; 1022 } 1023 1024 Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex) 1025 { 1026 ASSERT(var.rel.deterministic); 1027 1028 if(var.rel.type == Shader::PARAMETER_TEMP) 1029 { 1030 return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale; 1031 } 1032 else if(var.rel.type == Shader::PARAMETER_INPUT) 1033 { 1034 return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale; 1035 } 1036 else if(var.rel.type == Shader::PARAMETER_OUTPUT) 1037 { 1038 return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale; 1039 } 1040 else if(var.rel.type == Shader::PARAMETER_CONST) 1041 { 1042 return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale; 1043 } 1044 else if(var.rel.type == Shader::PARAMETER_LOOP) 1045 { 1046 return aL[loopDepth]; 1047 } 1048 else ASSERT(false); 1049 1050 return 0; 1051 } 1052 1053 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) 1054 { 1055 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); 1056 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); 1057 1058 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); 1059 } 1060 1061 void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) 1062 { 1063 Vector4f row0 = fetchRegister(src1, 0); 1064 Vector4f row1 = fetchRegister(src1, 1); 1065 1066 dst.x = dot3(src0, row0); 1067 dst.y = dot3(src0, row1); 1068 } 1069 1070 void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1071 { 1072 Vector4f row0 = fetchRegister(src1, 0); 1073 Vector4f row1 = fetchRegister(src1, 1); 1074 Vector4f row2 = fetchRegister(src1, 2); 1075 1076 dst.x = dot3(src0, row0); 1077 dst.y = dot3(src0, row1); 1078 dst.z = dot3(src0, row2); 1079 } 1080 1081 void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1082 { 1083 Vector4f row0 = fetchRegister(src1, 0); 1084 Vector4f row1 = fetchRegister(src1, 1); 1085 Vector4f row2 = fetchRegister(src1, 2); 1086 Vector4f row3 = fetchRegister(src1, 3); 1087 1088 dst.x = dot3(src0, row0); 1089 dst.y = dot3(src0, row1); 1090 dst.z = dot3(src0, row2); 1091 dst.w = dot3(src0, row3); 1092 } 1093 1094 void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1095 { 1096 Vector4f row0 = fetchRegister(src1, 0); 1097 Vector4f row1 = fetchRegister(src1, 1); 1098 Vector4f row2 = fetchRegister(src1, 2); 1099 1100 dst.x = dot4(src0, row0); 1101 dst.y = dot4(src0, row1); 1102 dst.z = dot4(src0, row2); 1103 } 1104 1105 void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1106 { 1107 Vector4f row0 = fetchRegister(src1, 0); 1108 Vector4f row1 = fetchRegister(src1, 1); 1109 Vector4f row2 = fetchRegister(src1, 2); 1110 Vector4f row3 = fetchRegister(src1, 3); 1111 1112 dst.x = dot4(src0, row0); 1113 dst.y = dot4(src0, row1); 1114 dst.z = dot4(src0, row2); 1115 dst.w = dot4(src0, row3); 1116 } 1117 1118 void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) 1119 { 1120 if(project) 1121 { 1122 Vector4f proj; 1123 Float4 rw = reciprocal(src0.w); 1124 proj.x = src0.x * rw; 1125 proj.y = src0.y * rw; 1126 proj.z = src0.z * rw; 1127 1128 dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit); 1129 } 1130 else 1131 { 1132 dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit); 1133 } 1134 } 1135 1136 void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset) 1137 { 1138 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset}); 1139 } 1140 1141 void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod) 1142 { 1143 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); 1144 } 1145 1146 void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias) 1147 { 1148 dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias); 1149 } 1150 1151 void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias) 1152 { 1153 dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset}); 1154 } 1155 1156 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) 1157 { 1158 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); 1159 } 1160 1161 void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) 1162 { 1163 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); 1164 } 1165 1166 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) 1167 { 1168 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad); 1169 } 1170 1171 void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) 1172 { 1173 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); 1174 } 1175 1176 void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod) 1177 { 1178 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); 1179 } 1180 1181 void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) 1182 { 1183 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture); 1184 dst = SamplerCore::textureSize(texture, lod); 1185 } 1186 1187 void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) 1188 { 1189 Int kill = -1; 1190 1191 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); 1192 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); 1193 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); 1194 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); 1195 1196 // FIXME: Dynamic branching affects TEXKILL? 1197 // if(shader->containsDynamicBranching()) 1198 // { 1199 // kill = ~SignMask(enableMask()); 1200 // } 1201 1202 for(unsigned int q = 0; q < state.multiSample; q++) 1203 { 1204 cMask[q] &= kill; 1205 } 1206 1207 // FIXME: Branch to end of shader if all killed? 1208 } 1209 1210 void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) 1211 { 1212 Int kill = 0; 1213 1214 if(shader->containsDynamicBranching()) 1215 { 1216 kill = ~SignMask(enableMask(instruction)); 1217 } 1218 1219 for(unsigned int q = 0; q < state.multiSample; q++) 1220 { 1221 cMask[q] &= kill; 1222 } 1223 1224 // FIXME: Branch to end of shader if all killed? 1225 } 1226 1227 void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) 1228 { 1229 dst.x = src.x.yyww - src.x.xxzz; 1230 dst.y = src.y.yyww - src.y.xxzz; 1231 dst.z = src.z.yyww - src.z.xxzz; 1232 dst.w = src.w.yyww - src.w.xxzz; 1233 } 1234 1235 void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) 1236 { 1237 dst.x = src.x.zwzw - src.x.xyxy; 1238 dst.y = src.y.zwzw - src.y.xyxy; 1239 dst.z = src.z.zwzw - src.z.xyxy; 1240 dst.w = src.w.zwzw - src.w.xyxy; 1241 } 1242 1243 void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) 1244 { 1245 // abs(dFdx(src)) + abs(dFdy(src)); 1246 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); 1247 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); 1248 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); 1249 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); 1250 } 1251 1252 void PixelProgram::BREAK() 1253 { 1254 enableBreak = enableBreak & ~enableStack[enableIndex]; 1255 } 1256 1257 void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) 1258 { 1259 Int4 condition; 1260 1261 switch(control) 1262 { 1263 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1264 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1265 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1266 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1267 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1268 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1269 default: 1270 ASSERT(false); 1271 } 1272 1273 BREAK(condition); 1274 } 1275 1276 void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 1277 { 1278 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1279 1280 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1281 { 1282 condition = ~condition; 1283 } 1284 1285 BREAK(condition); 1286 } 1287 1288 void PixelProgram::BREAK(Int4 &condition) 1289 { 1290 condition &= enableStack[enableIndex]; 1291 1292 enableBreak = enableBreak & ~condition; 1293 } 1294 1295 void PixelProgram::CONTINUE() 1296 { 1297 enableContinue = enableContinue & ~enableStack[enableIndex]; 1298 } 1299 1300 void PixelProgram::TEST() 1301 { 1302 whileTest = true; 1303 } 1304 1305 void PixelProgram::CALL(int labelIndex, int callSiteIndex) 1306 { 1307 if(!labelBlock[labelIndex]) 1308 { 1309 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1310 } 1311 1312 if(callRetBlock[labelIndex].size() > 1) 1313 { 1314 callStack[stackIndex++] = UInt(callSiteIndex); 1315 } 1316 1317 Int4 restoreLeave = enableLeave; 1318 1319 Nucleus::createBr(labelBlock[labelIndex]); 1320 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1321 1322 enableLeave = restoreLeave; 1323 } 1324 1325 void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) 1326 { 1327 if(src.type == Shader::PARAMETER_CONSTBOOL) 1328 { 1329 CALLNZb(labelIndex, callSiteIndex, src); 1330 } 1331 else if(src.type == Shader::PARAMETER_PREDICATE) 1332 { 1333 CALLNZp(labelIndex, callSiteIndex, src); 1334 } 1335 else ASSERT(false); 1336 } 1337 1338 void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) 1339 { 1340 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1341 1342 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1343 { 1344 condition = !condition; 1345 } 1346 1347 if(!labelBlock[labelIndex]) 1348 { 1349 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1350 } 1351 1352 if(callRetBlock[labelIndex].size() > 1) 1353 { 1354 callStack[stackIndex++] = UInt(callSiteIndex); 1355 } 1356 1357 Int4 restoreLeave = enableLeave; 1358 1359 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1360 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1361 1362 enableLeave = restoreLeave; 1363 } 1364 1365 void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) 1366 { 1367 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1368 1369 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1370 { 1371 condition = ~condition; 1372 } 1373 1374 condition &= enableStack[enableIndex]; 1375 1376 if(!labelBlock[labelIndex]) 1377 { 1378 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1379 } 1380 1381 if(callRetBlock[labelIndex].size() > 1) 1382 { 1383 callStack[stackIndex++] = UInt(callSiteIndex); 1384 } 1385 1386 enableIndex++; 1387 enableStack[enableIndex] = condition; 1388 Int4 restoreLeave = enableLeave; 1389 1390 Bool notAllFalse = SignMask(condition) != 0; 1391 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1392 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1393 1394 enableIndex--; 1395 enableLeave = restoreLeave; 1396 } 1397 1398 void PixelProgram::ELSE() 1399 { 1400 ifDepth--; 1401 1402 BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 1403 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1404 1405 if(isConditionalIf[ifDepth]) 1406 { 1407 Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1408 Bool notAllFalse = SignMask(condition) != 0; 1409 1410 branch(notAllFalse, falseBlock, endBlock); 1411 1412 enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1413 } 1414 else 1415 { 1416 Nucleus::createBr(endBlock); 1417 Nucleus::setInsertBlock(falseBlock); 1418 } 1419 1420 ifFalseBlock[ifDepth] = endBlock; 1421 1422 ifDepth++; 1423 } 1424 1425 void PixelProgram::ENDIF() 1426 { 1427 ifDepth--; 1428 1429 BasicBlock *endBlock = ifFalseBlock[ifDepth]; 1430 1431 Nucleus::createBr(endBlock); 1432 Nucleus::setInsertBlock(endBlock); 1433 1434 if(isConditionalIf[ifDepth]) 1435 { 1436 enableIndex--; 1437 } 1438 } 1439 1440 void PixelProgram::ENDLOOP() 1441 { 1442 loopRepDepth--; 1443 1444 aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += 1445 1446 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1447 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1448 1449 Nucleus::createBr(testBlock); 1450 Nucleus::setInsertBlock(endBlock); 1451 1452 loopDepth--; 1453 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1454 } 1455 1456 void PixelProgram::ENDREP() 1457 { 1458 loopRepDepth--; 1459 1460 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1461 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1462 1463 Nucleus::createBr(testBlock); 1464 Nucleus::setInsertBlock(endBlock); 1465 1466 loopDepth--; 1467 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1468 } 1469 1470 void PixelProgram::ENDWHILE() 1471 { 1472 loopRepDepth--; 1473 1474 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1475 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1476 1477 Nucleus::createBr(testBlock); 1478 Nucleus::setInsertBlock(endBlock); 1479 1480 enableIndex--; 1481 whileTest = false; 1482 } 1483 1484 void PixelProgram::ENDSWITCH() 1485 { 1486 loopRepDepth--; 1487 1488 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1489 1490 Nucleus::createBr(endBlock); 1491 Nucleus::setInsertBlock(endBlock); 1492 } 1493 1494 void PixelProgram::IF(const Src &src) 1495 { 1496 if(src.type == Shader::PARAMETER_CONSTBOOL) 1497 { 1498 IFb(src); 1499 } 1500 else if(src.type == Shader::PARAMETER_PREDICATE) 1501 { 1502 IFp(src); 1503 } 1504 else 1505 { 1506 Int4 condition = As<Int4>(fetchRegister(src).x); 1507 IF(condition); 1508 } 1509 } 1510 1511 void PixelProgram::IFb(const Src &boolRegister) 1512 { 1513 ASSERT(ifDepth < 24 + 4); 1514 1515 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1516 1517 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1518 { 1519 condition = !condition; 1520 } 1521 1522 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1523 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1524 1525 branch(condition, trueBlock, falseBlock); 1526 1527 isConditionalIf[ifDepth] = false; 1528 ifFalseBlock[ifDepth] = falseBlock; 1529 1530 ifDepth++; 1531 } 1532 1533 void PixelProgram::IFp(const Src &predicateRegister) 1534 { 1535 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1536 1537 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1538 { 1539 condition = ~condition; 1540 } 1541 1542 IF(condition); 1543 } 1544 1545 void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) 1546 { 1547 Int4 condition; 1548 1549 switch(control) 1550 { 1551 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1552 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1553 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1554 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1555 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1556 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1557 default: 1558 ASSERT(false); 1559 } 1560 1561 IF(condition); 1562 } 1563 1564 void PixelProgram::IF(Int4 &condition) 1565 { 1566 condition &= enableStack[enableIndex]; 1567 1568 enableIndex++; 1569 enableStack[enableIndex] = condition; 1570 1571 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1572 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1573 1574 Bool notAllFalse = SignMask(condition) != 0; 1575 1576 branch(notAllFalse, trueBlock, falseBlock); 1577 1578 isConditionalIf[ifDepth] = true; 1579 ifFalseBlock[ifDepth] = falseBlock; 1580 1581 ifDepth++; 1582 } 1583 1584 void PixelProgram::LABEL(int labelIndex) 1585 { 1586 if(!labelBlock[labelIndex]) 1587 { 1588 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1589 } 1590 1591 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1592 currentLabel = labelIndex; 1593 } 1594 1595 void PixelProgram::LOOP(const Src &integerRegister) 1596 { 1597 loopDepth++; 1598 1599 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1600 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); 1601 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); 1602 1603 // If(increment[loopDepth] == 0) 1604 // { 1605 // increment[loopDepth] = 1; 1606 // } 1607 1608 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1609 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1610 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1611 1612 loopRepTestBlock[loopRepDepth] = testBlock; 1613 loopRepEndBlock[loopRepDepth] = endBlock; 1614 1615 // FIXME: jump(testBlock) 1616 Nucleus::createBr(testBlock); 1617 Nucleus::setInsertBlock(testBlock); 1618 1619 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1620 Nucleus::setInsertBlock(loopBlock); 1621 1622 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1623 1624 loopRepDepth++; 1625 } 1626 1627 void PixelProgram::REP(const Src &integerRegister) 1628 { 1629 loopDepth++; 1630 1631 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1632 aL[loopDepth] = aL[loopDepth - 1]; 1633 1634 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1635 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1636 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1637 1638 loopRepTestBlock[loopRepDepth] = testBlock; 1639 loopRepEndBlock[loopRepDepth] = endBlock; 1640 1641 // FIXME: jump(testBlock) 1642 Nucleus::createBr(testBlock); 1643 Nucleus::setInsertBlock(testBlock); 1644 1645 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1646 Nucleus::setInsertBlock(loopBlock); 1647 1648 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1649 1650 loopRepDepth++; 1651 } 1652 1653 void PixelProgram::WHILE(const Src &temporaryRegister) 1654 { 1655 enableIndex++; 1656 1657 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1658 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1659 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1660 1661 loopRepTestBlock[loopRepDepth] = testBlock; 1662 loopRepEndBlock[loopRepDepth] = endBlock; 1663 1664 Int4 restoreBreak = enableBreak; 1665 Int4 restoreContinue = enableContinue; 1666 1667 // TODO: jump(testBlock) 1668 Nucleus::createBr(testBlock); 1669 Nucleus::setInsertBlock(testBlock); 1670 enableContinue = restoreContinue; 1671 1672 const Vector4f &src = fetchRegister(temporaryRegister); 1673 Int4 condition = As<Int4>(src.x); 1674 condition &= enableStack[enableIndex - 1]; 1675 if(shader->containsLeaveInstruction()) condition &= enableLeave; 1676 if(shader->containsBreakInstruction()) condition &= enableBreak; 1677 enableStack[enableIndex] = condition; 1678 1679 Bool notAllFalse = SignMask(condition) != 0; 1680 branch(notAllFalse, loopBlock, endBlock); 1681 1682 Nucleus::setInsertBlock(endBlock); 1683 enableBreak = restoreBreak; 1684 1685 Nucleus::setInsertBlock(loopBlock); 1686 1687 loopRepDepth++; 1688 } 1689 1690 void PixelProgram::SWITCH() 1691 { 1692 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1693 1694 loopRepTestBlock[loopRepDepth] = nullptr; 1695 loopRepEndBlock[loopRepDepth] = endBlock; 1696 1697 Int4 restoreBreak = enableBreak; 1698 1699 BasicBlock *currentBlock = Nucleus::getInsertBlock(); 1700 1701 Nucleus::setInsertBlock(endBlock); 1702 enableBreak = restoreBreak; 1703 1704 Nucleus::setInsertBlock(currentBlock); 1705 1706 loopRepDepth++; 1707 } 1708 1709 void PixelProgram::RET() 1710 { 1711 if(currentLabel == -1) 1712 { 1713 returnBlock = Nucleus::createBasicBlock(); 1714 Nucleus::createBr(returnBlock); 1715 } 1716 else 1717 { 1718 BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1719 1720 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack 1721 { 1722 // FIXME: Encapsulate 1723 UInt index = callStack[--stackIndex]; 1724 1725 Value *value = index.loadValue(); 1726 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); 1727 1728 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) 1729 { 1730 Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); 1731 } 1732 } 1733 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination 1734 { 1735 Nucleus::createBr(callRetBlock[currentLabel][0]); 1736 } 1737 else // Function isn't called 1738 { 1739 Nucleus::createBr(unreachableBlock); 1740 } 1741 1742 Nucleus::setInsertBlock(unreachableBlock); 1743 Nucleus::createUnreachable(); 1744 } 1745 } 1746 1747 void PixelProgram::LEAVE() 1748 { 1749 enableLeave = enableLeave & ~enableStack[enableIndex]; 1750 1751 // FIXME: Return from function if all instances left 1752 // FIXME: Use enableLeave in other control-flow constructs 1753 } 1754 } 1755