Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "PixelProgram.hpp"
     16 
     17 #include "SamplerCore.hpp"
     18 #include "Renderer/Primitive.hpp"
     19 #include "Renderer/Renderer.hpp"
     20 
     21 namespace sw
     22 {
     23 	extern bool postBlendSRGB;
     24 	extern bool booleanFaceRegister;
     25 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
     26 	extern bool fullPixelPositionRegister;
     27 
     28 	void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
     29 	{
     30 		if(shader->getShaderModel() >= 0x0300)
     31 		{
     32 			if(shader->isVPosDeclared())
     33 			{
     34 				if(!halfIntegerCoordinates)
     35 				{
     36 					vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
     37 					vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
     38 				}
     39 				else
     40 				{
     41 					vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
     42 					vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
     43 				}
     44 
     45 				if(fullPixelPositionRegister)
     46 				{
     47 					vPos.z = z[0]; // FIXME: Centroid?
     48 					vPos.w = w;    // FIXME: Centroid?
     49 				}
     50 			}
     51 
     52 			if(shader->isVFaceDeclared())
     53 			{
     54 				Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area));
     55 				Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
     56 
     57 				vFace.x = face;
     58 				vFace.y = face;
     59 				vFace.z = face;
     60 				vFace.w = face;
     61 			}
     62 		}
     63 	}
     64 
     65 	void PixelProgram::applyShader(Int cMask[4])
     66 	{
     67 		enableIndex = 0;
     68 		stackIndex = 0;
     69 
     70 		if(shader->containsLeaveInstruction())
     71 		{
     72 			enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
     73 		}
     74 
     75 		for(int i = 0; i < RENDERTARGETS; i++)
     76 		{
     77 			if(state.targetFormat[i] != FORMAT_NULL)
     78 			{
     79 				oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
     80 			}
     81 		}
     82 
     83 		// Create all call site return blocks up front
     84 		for(size_t i = 0; i < shader->getLength(); i++)
     85 		{
     86 			const Shader::Instruction *instruction = shader->getInstruction(i);
     87 			Shader::Opcode opcode = instruction->opcode;
     88 
     89 			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
     90 			{
     91 				const Dst &dst = instruction->dst;
     92 
     93 				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
     94 				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
     95 			}
     96 		}
     97 
     98 		bool broadcastColor0 = true;
     99 
    100 		for(size_t i = 0; i < shader->getLength(); i++)
    101 		{
    102 			const Shader::Instruction *instruction = shader->getInstruction(i);
    103 			Shader::Opcode opcode = instruction->opcode;
    104 
    105 			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
    106 			{
    107 				continue;
    108 			}
    109 
    110 			const Dst &dst = instruction->dst;
    111 			const Src &src0 = instruction->src[0];
    112 			const Src &src1 = instruction->src[1];
    113 			const Src &src2 = instruction->src[2];
    114 			const Src &src3 = instruction->src[3];
    115 			const Src &src4 = instruction->src[4];
    116 
    117 			bool predicate = instruction->predicate;
    118 			Control control = instruction->control;
    119 			bool pp = dst.partialPrecision;
    120 			bool project = instruction->project;
    121 			bool bias = instruction->bias;
    122 
    123 			Vector4f d;
    124 			Vector4f s0;
    125 			Vector4f s1;
    126 			Vector4f s2;
    127 			Vector4f s3;
    128 			Vector4f s4;
    129 
    130 			if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
    131 			{
    132 				if(dst.type == Shader::PARAMETER_TEXTURE)
    133 				{
    134 					d.x = v[2 + dst.index].x;
    135 					d.y = v[2 + dst.index].y;
    136 					d.z = v[2 + dst.index].z;
    137 					d.w = v[2 + dst.index].w;
    138 				}
    139 				else
    140 				{
    141 					d = r[dst.index];
    142 				}
    143 			}
    144 
    145 			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
    146 			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
    147 			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
    148 			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
    149 			if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
    150 
    151 			switch(opcode)
    152 			{
    153 			case Shader::OPCODE_PS_2_0:                                                    break;
    154 			case Shader::OPCODE_PS_2_x:                                                    break;
    155 			case Shader::OPCODE_PS_3_0:                                                    break;
    156 			case Shader::OPCODE_DEF:                                                       break;
    157 			case Shader::OPCODE_DCL:                                                       break;
    158 			case Shader::OPCODE_NOP:                                                       break;
    159 			case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
    160 			case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
    161 			case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
    162 			case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
    163 			case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
    164 			case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
    165 			case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
    166 			case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
    167 			case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
    168 			case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
    169 			case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
    170 			case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
    171 			case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
    172 			case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
    173 			case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
    174 			case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
    175 			case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
    176 			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
    177 			case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
    178 			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
    179 			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
    180 			case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
    181 			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
    182 			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
    183 			case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
    184 			case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
    185 			case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
    186 			case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
    187 			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
    188 			case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
    189 			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
    190 			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
    191 			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
    192 			case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
    193 			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
    194 			case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
    195 			case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
    196 			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
    197 			case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
    198 			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
    199 			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
    200 			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
    201 			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
    202 			case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
    203 			case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
    204 			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
    205 			case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
    206 			case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
    207 			case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
    208 			case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
    209 			case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
    210 			case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
    211 			case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
    212 			case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
    213 			case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
    214 			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
    215 			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
    216 			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
    217 			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
    218 			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
    219 			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
    220 			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
    221 			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
    222 			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
    223 			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
    224 			case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
    225 			case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
    226 			case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
    227 			case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
    228 			case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
    229 			case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
    230 			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
    231 			case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
    232 			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
    233 			case Shader::OPCODE_ISINF:      isinf(d, s0);                                  break;
    234 			case Shader::OPCODE_ISNAN:      isnan(d, s0);                                  break;
    235 			case Shader::OPCODE_FLOATBITSTOINT:
    236 			case Shader::OPCODE_FLOATBITSTOUINT:
    237 			case Shader::OPCODE_INTBITSTOFLOAT:
    238 			case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
    239 			case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
    240 			case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
    241 			case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);                      break;
    242 			case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
    243 			case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
    244 			case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);                    break;
    245 			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
    246 			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
    247 			case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
    248 			case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
    249 			case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
    250 			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
    251 			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
    252 			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
    253 			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
    254 			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
    255 			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
    256 			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
    257 			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
    258 			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
    259 			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
    260 			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
    261 			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
    262 			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
    263 			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
    264 			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
    265 			case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
    266 			case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
    267 			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
    268 			case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
    269 			case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
    270 			case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
    271 			case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
    272 			case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
    273 			case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
    274 			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
    275 			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
    276 			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
    277 			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
    278 			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
    279 			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
    280 			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
    281 			case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
    282 			case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
    283 			case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
    284 			case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
    285 			case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
    286 			case Shader::OPCODE_TEX:        TEX(d, s0, src1, project, bias);               break;
    287 			case Shader::OPCODE_TEXLDD:     TEXGRAD(d, s0, src1, s2, s3);                  break;
    288 			case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);                     break;
    289 			case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);                     break;
    290 			case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
    291 			case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
    292 			case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);                    break;
    293 			case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x);         break;
    294 			case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);                 break;
    295 			case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
    296 			case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
    297 			case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4);     break;
    298 			case Shader::OPCODE_TEXBIAS:    TEXBIAS(d, s0, src1, s2.x);                    break;
    299 			case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x);       break;
    300 			case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
    301 			case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
    302 			case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
    303 			case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
    304 			case Shader::OPCODE_BREAK:      BREAK();                                       break;
    305 			case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
    306 			case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
    307 			case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
    308 			case Shader::OPCODE_TEST:       TEST();                                        break;
    309 			case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
    310 			case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
    311 			case Shader::OPCODE_ELSE:       ELSE();                                        break;
    312 			case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
    313 			case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
    314 			case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
    315 			case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
    316 			case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                                   break;
    317 			case Shader::OPCODE_IF:         IF(src0);                                      break;
    318 			case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
    319 			case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
    320 			case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
    321 			case Shader::OPCODE_REP:        REP(src0);                                     break;
    322 			case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
    323 			case Shader::OPCODE_SWITCH:     SWITCH();                                      break;
    324 			case Shader::OPCODE_RET:        RET();                                         break;
    325 			case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
    326 			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
    327 			case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
    328 			case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
    329 			case Shader::OPCODE_NOT:        bitwise_not(d, s0);                            break;
    330 			case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);                         break;
    331 			case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);                        break;
    332 			case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);                        break;
    333 			case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
    334 			case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
    335 			case Shader::OPCODE_END:                                                       break;
    336 			default:
    337 				ASSERT(false);
    338 			}
    339 
    340 			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
    341 			{
    342 				if(dst.saturate)
    343 				{
    344 					if(dst.x) d.x = Max(d.x, Float4(0.0f));
    345 					if(dst.y) d.y = Max(d.y, Float4(0.0f));
    346 					if(dst.z) d.z = Max(d.z, Float4(0.0f));
    347 					if(dst.w) d.w = Max(d.w, Float4(0.0f));
    348 
    349 					if(dst.x) d.x = Min(d.x, Float4(1.0f));
    350 					if(dst.y) d.y = Min(d.y, Float4(1.0f));
    351 					if(dst.z) d.z = Min(d.z, Float4(1.0f));
    352 					if(dst.w) d.w = Min(d.w, Float4(1.0f));
    353 				}
    354 
    355 				if(instruction->isPredicated())
    356 				{
    357 					Vector4f pDst;   // FIXME: Rename
    358 
    359 					switch(dst.type)
    360 					{
    361 					case Shader::PARAMETER_TEMP:
    362 						if(dst.rel.type == Shader::PARAMETER_VOID)
    363 						{
    364 							if(dst.x) pDst.x = r[dst.index].x;
    365 							if(dst.y) pDst.y = r[dst.index].y;
    366 							if(dst.z) pDst.z = r[dst.index].z;
    367 							if(dst.w) pDst.w = r[dst.index].w;
    368 						}
    369 						else
    370 						{
    371 							Int a = relativeAddress(dst);
    372 
    373 							if(dst.x) pDst.x = r[dst.index + a].x;
    374 							if(dst.y) pDst.y = r[dst.index + a].y;
    375 							if(dst.z) pDst.z = r[dst.index + a].z;
    376 							if(dst.w) pDst.w = r[dst.index + a].w;
    377 						}
    378 						break;
    379 					case Shader::PARAMETER_COLOROUT:
    380 						if(dst.rel.type == Shader::PARAMETER_VOID)
    381 						{
    382 							if(dst.x) pDst.x = oC[dst.index].x;
    383 							if(dst.y) pDst.y = oC[dst.index].y;
    384 							if(dst.z) pDst.z = oC[dst.index].z;
    385 							if(dst.w) pDst.w = oC[dst.index].w;
    386 						}
    387 						else
    388 						{
    389 							Int a = relativeAddress(dst) + dst.index;
    390 
    391 							if(dst.x) pDst.x = oC[a].x;
    392 							if(dst.y) pDst.y = oC[a].y;
    393 							if(dst.z) pDst.z = oC[a].z;
    394 							if(dst.w) pDst.w = oC[a].w;
    395 						}
    396 						break;
    397 					case Shader::PARAMETER_PREDICATE:
    398 						if(dst.x) pDst.x = p0.x;
    399 						if(dst.y) pDst.y = p0.y;
    400 						if(dst.z) pDst.z = p0.z;
    401 						if(dst.w) pDst.w = p0.w;
    402 						break;
    403 					case Shader::PARAMETER_DEPTHOUT:
    404 						pDst.x = oDepth;
    405 						break;
    406 					default:
    407 						ASSERT(false);
    408 					}
    409 
    410 					Int4 enable = enableMask(instruction);
    411 
    412 					Int4 xEnable = enable;
    413 					Int4 yEnable = enable;
    414 					Int4 zEnable = enable;
    415 					Int4 wEnable = enable;
    416 
    417 					if(predicate)
    418 					{
    419 						unsigned char pSwizzle = instruction->predicateSwizzle;
    420 
    421 						Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
    422 						Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
    423 						Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
    424 						Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
    425 
    426 						if(!instruction->predicateNot)
    427 						{
    428 							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
    429 							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
    430 							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
    431 							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
    432 						}
    433 						else
    434 						{
    435 							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
    436 							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
    437 							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
    438 							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
    439 						}
    440 					}
    441 
    442 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
    443 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
    444 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
    445 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
    446 
    447 					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
    448 					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
    449 					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
    450 					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
    451 				}
    452 
    453 				switch(dst.type)
    454 				{
    455 				case Shader::PARAMETER_TEMP:
    456 					if(dst.rel.type == Shader::PARAMETER_VOID)
    457 					{
    458 						if(dst.x) r[dst.index].x = d.x;
    459 						if(dst.y) r[dst.index].y = d.y;
    460 						if(dst.z) r[dst.index].z = d.z;
    461 						if(dst.w) r[dst.index].w = d.w;
    462 					}
    463 					else
    464 					{
    465 						Int a = relativeAddress(dst);
    466 
    467 						if(dst.x) r[dst.index + a].x = d.x;
    468 						if(dst.y) r[dst.index + a].y = d.y;
    469 						if(dst.z) r[dst.index + a].z = d.z;
    470 						if(dst.w) r[dst.index + a].w = d.w;
    471 					}
    472 					break;
    473 				case Shader::PARAMETER_COLOROUT:
    474 					if(dst.rel.type == Shader::PARAMETER_VOID)
    475 					{
    476 						broadcastColor0 = (dst.index == 0) && broadcastColor0;
    477 
    478 						if(dst.x) { oC[dst.index].x = d.x; }
    479 						if(dst.y) { oC[dst.index].y = d.y; }
    480 						if(dst.z) { oC[dst.index].z = d.z; }
    481 						if(dst.w) { oC[dst.index].w = d.w; }
    482 					}
    483 					else
    484 					{
    485 						broadcastColor0 = false;
    486 						Int a = relativeAddress(dst) + dst.index;
    487 
    488 						if(dst.x) { oC[a].x = d.x; }
    489 						if(dst.y) { oC[a].y = d.y; }
    490 						if(dst.z) { oC[a].z = d.z; }
    491 						if(dst.w) { oC[a].w = d.w; }
    492 					}
    493 					break;
    494 				case Shader::PARAMETER_PREDICATE:
    495 					if(dst.x) p0.x = d.x;
    496 					if(dst.y) p0.y = d.y;
    497 					if(dst.z) p0.z = d.z;
    498 					if(dst.w) p0.w = d.w;
    499 					break;
    500 				case Shader::PARAMETER_DEPTHOUT:
    501 					oDepth = d.x;
    502 					break;
    503 				default:
    504 					ASSERT(false);
    505 				}
    506 			}
    507 		}
    508 
    509 		if(currentLabel != -1)
    510 		{
    511 			Nucleus::setInsertBlock(returnBlock);
    512 		}
    513 
    514 		if(broadcastColor0)
    515 		{
    516 			for(int i = 0; i < RENDERTARGETS; i++)
    517 			{
    518 				c[i] = oC[0];
    519 			}
    520 		}
    521 		else
    522 		{
    523 			for(int i = 0; i < RENDERTARGETS; i++)
    524 			{
    525 				c[i] = oC[i];
    526 			}
    527 		}
    528 
    529 		clampColor(c);
    530 
    531 		if(state.depthOverride)
    532 		{
    533 			oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f));
    534 		}
    535 	}
    536 
    537 	Bool PixelProgram::alphaTest(Int cMask[4])
    538 	{
    539 		if(!state.alphaTestActive())
    540 		{
    541 			return true;
    542 		}
    543 
    544 		Int aMask;
    545 
    546 		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
    547 		{
    548 			Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
    549 
    550 			PixelRoutine::alphaTest(aMask, alpha);
    551 
    552 			for(unsigned int q = 0; q < state.multiSample; q++)
    553 			{
    554 				cMask[q] &= aMask;
    555 			}
    556 		}
    557 		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
    558 		{
    559 			alphaToCoverage(cMask, c[0].w);
    560 		}
    561 		else ASSERT(false);
    562 
    563 		Int pass = cMask[0];
    564 
    565 		for(unsigned int q = 1; q < state.multiSample; q++)
    566 		{
    567 			pass = pass | cMask[q];
    568 		}
    569 
    570 		return pass != 0x0;
    571 	}
    572 
    573 	void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
    574 	{
    575 		for(int index = 0; index < RENDERTARGETS; index++)
    576 		{
    577 			if(!state.colorWriteActive(index))
    578 			{
    579 				continue;
    580 			}
    581 
    582 			if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
    583 			{
    584 				c[index].x = linearToSRGB(c[index].x);
    585 				c[index].y = linearToSRGB(c[index].y);
    586 				c[index].z = linearToSRGB(c[index].z);
    587 			}
    588 
    589 			if(index == 0)
    590 			{
    591 				fogBlend(c[index], fog);
    592 			}
    593 
    594 			switch(state.targetFormat[index])
    595 			{
    596 			case FORMAT_R5G6B5:
    597 			case FORMAT_X8R8G8B8:
    598 			case FORMAT_X8B8G8R8:
    599 			case FORMAT_A8R8G8B8:
    600 			case FORMAT_A8B8G8R8:
    601 			case FORMAT_SRGB8_X8:
    602 			case FORMAT_SRGB8_A8:
    603 			case FORMAT_G8R8:
    604 			case FORMAT_R8:
    605 			case FORMAT_A8:
    606 			case FORMAT_G16R16:
    607 			case FORMAT_A16B16G16R16:
    608 				for(unsigned int q = 0; q < state.multiSample; q++)
    609 				{
    610 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
    611 					Vector4s color;
    612 
    613 					if(state.targetFormat[index] == FORMAT_R5G6B5)
    614 					{
    615 						color.x = UShort4(c[index].x * Float4(0xFBFF), false);
    616 						color.y = UShort4(c[index].y * Float4(0xFDFF), false);
    617 						color.z = UShort4(c[index].z * Float4(0xFBFF), false);
    618 						color.w = UShort4(c[index].w * Float4(0xFFFF), false);
    619 					}
    620 					else
    621 					{
    622 						color.x = convertFixed16(c[index].x, false);
    623 						color.y = convertFixed16(c[index].y, false);
    624 						color.z = convertFixed16(c[index].z, false);
    625 						color.w = convertFixed16(c[index].w, false);
    626 					}
    627 
    628 					if(state.multiSampleMask & (1 << q))
    629 					{
    630 						alphaBlend(index, buffer, color, x);
    631 						logicOperation(index, buffer, color, x);
    632 						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
    633 					}
    634 				}
    635 				break;
    636 			case FORMAT_R32F:
    637 			case FORMAT_G32R32F:
    638 			case FORMAT_X32B32G32R32F:
    639 			case FORMAT_A32B32G32R32F:
    640 			case FORMAT_X32B32G32R32F_UNSIGNED:
    641 			case FORMAT_R32I:
    642 			case FORMAT_G32R32I:
    643 			case FORMAT_A32B32G32R32I:
    644 			case FORMAT_R32UI:
    645 			case FORMAT_G32R32UI:
    646 			case FORMAT_A32B32G32R32UI:
    647 			case FORMAT_R16I:
    648 			case FORMAT_G16R16I:
    649 			case FORMAT_A16B16G16R16I:
    650 			case FORMAT_R16UI:
    651 			case FORMAT_G16R16UI:
    652 			case FORMAT_A16B16G16R16UI:
    653 			case FORMAT_R8I:
    654 			case FORMAT_G8R8I:
    655 			case FORMAT_A8B8G8R8I:
    656 			case FORMAT_R8UI:
    657 			case FORMAT_G8R8UI:
    658 			case FORMAT_A8B8G8R8UI:
    659 				for(unsigned int q = 0; q < state.multiSample; q++)
    660 				{
    661 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
    662 					Vector4f color = c[index];
    663 
    664 					if(state.multiSampleMask & (1 << q))
    665 					{
    666 						alphaBlend(index, buffer, color, x);
    667 						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
    668 					}
    669 				}
    670 				break;
    671 			default:
    672 				ASSERT(false);
    673 			}
    674 		}
    675 	}
    676 
    677 	Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
    678 	{
    679 		Vector4f tmp;
    680 
    681 		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
    682 		{
    683 			tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function);
    684 		}
    685 		else
    686 		{
    687 			Int index = As<Int>(Float(fetchRegister(sampler).x.x));
    688 
    689 			for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
    690 			{
    691 				if(shader->usesSampler(i))
    692 				{
    693 					If(index == i)
    694 					{
    695 						tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function);
    696 						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
    697 					}
    698 				}
    699 			}
    700 		}
    701 
    702 		Vector4f c;
    703 		c.x = tmp[(sampler.swizzle >> 0) & 0x3];
    704 		c.y = tmp[(sampler.swizzle >> 2) & 0x3];
    705 		c.z = tmp[(sampler.swizzle >> 4) & 0x3];
    706 		c.w = tmp[(sampler.swizzle >> 6) & 0x3];
    707 
    708 		return c;
    709 	}
    710 
    711 	Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
    712 	{
    713 		#if PERF_PROFILE
    714 			Long texTime = Ticks();
    715 		#endif
    716 
    717 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
    718 		Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function);
    719 
    720 		#if PERF_PROFILE
    721 			cycles[PERF_TEX] += Ticks() - texTime;
    722 		#endif
    723 
    724 		return c;
    725 	}
    726 
    727 	void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
    728 	{
    729 		for(int index = 0; index < RENDERTARGETS; index++)
    730 		{
    731 			if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
    732 			{
    733 				continue;
    734 			}
    735 
    736 			switch(state.targetFormat[index])
    737 			{
    738 			case FORMAT_NULL:
    739 				break;
    740 			case FORMAT_R5G6B5:
    741 			case FORMAT_A8R8G8B8:
    742 			case FORMAT_A8B8G8R8:
    743 			case FORMAT_X8R8G8B8:
    744 			case FORMAT_X8B8G8R8:
    745 			case FORMAT_SRGB8_X8:
    746 			case FORMAT_SRGB8_A8:
    747 			case FORMAT_G8R8:
    748 			case FORMAT_R8:
    749 			case FORMAT_A8:
    750 			case FORMAT_G16R16:
    751 			case FORMAT_A16B16G16R16:
    752 				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
    753 				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
    754 				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
    755 				oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
    756 				break;
    757 			case FORMAT_R32F:
    758 			case FORMAT_G32R32F:
    759 			case FORMAT_X32B32G32R32F:
    760 			case FORMAT_A32B32G32R32F:
    761 			case FORMAT_R32I:
    762 			case FORMAT_G32R32I:
    763 			case FORMAT_A32B32G32R32I:
    764 			case FORMAT_R32UI:
    765 			case FORMAT_G32R32UI:
    766 			case FORMAT_A32B32G32R32UI:
    767 			case FORMAT_R16I:
    768 			case FORMAT_G16R16I:
    769 			case FORMAT_A16B16G16R16I:
    770 			case FORMAT_R16UI:
    771 			case FORMAT_G16R16UI:
    772 			case FORMAT_A16B16G16R16UI:
    773 			case FORMAT_R8I:
    774 			case FORMAT_G8R8I:
    775 			case FORMAT_A8B8G8R8I:
    776 			case FORMAT_R8UI:
    777 			case FORMAT_G8R8UI:
    778 			case FORMAT_A8B8G8R8UI:
    779 				break;
    780 			case FORMAT_X32B32G32R32F_UNSIGNED:
    781 				oC[index].x = Max(oC[index].x, Float4(0.0f));
    782 				oC[index].y = Max(oC[index].y, Float4(0.0f));
    783 				oC[index].z = Max(oC[index].z, Float4(0.0f));
    784 				oC[index].w = Max(oC[index].w, Float4(0.0f));
    785 				break;
    786 			default:
    787 				ASSERT(false);
    788 			}
    789 		}
    790 	}
    791 
    792 	Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
    793 	{
    794 		Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
    795 
    796 		if(!whileTest)
    797 		{
    798 			if(shader->containsBreakInstruction() && instruction->analysisBreak)
    799 			{
    800 				enable &= enableBreak;
    801 			}
    802 
    803 			if(shader->containsContinueInstruction() && instruction->analysisContinue)
    804 			{
    805 				enable &= enableContinue;
    806 			}
    807 
    808 			if(shader->containsLeaveInstruction() && instruction->analysisLeave)
    809 			{
    810 				enable &= enableLeave;
    811 			}
    812 		}
    813 
    814 		return enable;
    815 	}
    816 
    817 	Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
    818 	{
    819 		Vector4f reg;
    820 		unsigned int i = src.index + offset;
    821 
    822 		switch(src.type)
    823 		{
    824 		case Shader::PARAMETER_TEMP:
    825 			if(src.rel.type == Shader::PARAMETER_VOID)
    826 			{
    827 				reg = r[i];
    828 			}
    829 			else
    830 			{
    831 				Int a = relativeAddress(src, src.bufferIndex);
    832 
    833 				reg = r[i + a];
    834 			}
    835 			break;
    836 		case Shader::PARAMETER_INPUT:
    837 			{
    838 				if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
    839 				{
    840 					reg = v[i];
    841 				}
    842 				else
    843 				{
    844 					Int a = relativeAddress(src, src.bufferIndex);
    845 
    846 					reg = v[i + a];
    847 				}
    848 			}
    849 			break;
    850 		case Shader::PARAMETER_CONST:
    851 			reg = readConstant(src, offset);
    852 			break;
    853 		case Shader::PARAMETER_TEXTURE:
    854 			reg = v[2 + i];
    855 			break;
    856 		case Shader::PARAMETER_MISCTYPE:
    857 			if(src.index == Shader::VPosIndex) reg = vPos;
    858 			if(src.index == Shader::VFaceIndex) reg = vFace;
    859 			break;
    860 		case Shader::PARAMETER_SAMPLER:
    861 			if(src.rel.type == Shader::PARAMETER_VOID)
    862 			{
    863 				reg.x = As<Float4>(Int4(i));
    864 			}
    865 			else if(src.rel.type == Shader::PARAMETER_TEMP)
    866 			{
    867 				reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
    868 			}
    869 			return reg;
    870 		case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
    871 		case Shader::PARAMETER_VOID:        return reg; // Dummy
    872 		case Shader::PARAMETER_FLOAT4LITERAL:
    873 			reg.x = Float4(src.value[0]);
    874 			reg.y = Float4(src.value[1]);
    875 			reg.z = Float4(src.value[2]);
    876 			reg.w = Float4(src.value[3]);
    877 			break;
    878 		case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
    879 		case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
    880 		case Shader::PARAMETER_LOOP:        return reg; // Dummy
    881 		case Shader::PARAMETER_COLOROUT:
    882 			if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
    883 			{
    884 				reg = oC[i];
    885 			}
    886 			else
    887 			{
    888 				Int a = relativeAddress(src, src.bufferIndex);
    889 
    890 				reg = oC[i + a];
    891 			}
    892 			break;
    893 		case Shader::PARAMETER_DEPTHOUT:
    894 			reg.x = oDepth;
    895 			break;
    896 		default:
    897 			ASSERT(false);
    898 		}
    899 
    900 		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
    901 		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
    902 		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
    903 		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
    904 
    905 		Vector4f mod;
    906 
    907 		switch(src.modifier)
    908 		{
    909 		case Shader::MODIFIER_NONE:
    910 			mod.x = x;
    911 			mod.y = y;
    912 			mod.z = z;
    913 			mod.w = w;
    914 			break;
    915 		case Shader::MODIFIER_NEGATE:
    916 			mod.x = -x;
    917 			mod.y = -y;
    918 			mod.z = -z;
    919 			mod.w = -w;
    920 			break;
    921 		case Shader::MODIFIER_ABS:
    922 			mod.x = Abs(x);
    923 			mod.y = Abs(y);
    924 			mod.z = Abs(z);
    925 			mod.w = Abs(w);
    926 			break;
    927 		case Shader::MODIFIER_ABS_NEGATE:
    928 			mod.x = -Abs(x);
    929 			mod.y = -Abs(y);
    930 			mod.z = -Abs(z);
    931 			mod.w = -Abs(w);
    932 			break;
    933 		case Shader::MODIFIER_NOT:
    934 			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
    935 			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
    936 			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
    937 			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
    938 			break;
    939 		default:
    940 			ASSERT(false);
    941 		}
    942 
    943 		return mod;
    944 	}
    945 
    946 	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
    947 	{
    948 		if(bufferIndex == -1)
    949 		{
    950 			return data + OFFSET(DrawData, ps.c[index]);
    951 		}
    952 		else
    953 		{
    954 			return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
    955 		}
    956 	}
    957 
    958 	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
    959 	{
    960 		return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
    961 	}
    962 
    963 	Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
    964 	{
    965 		Vector4f c;
    966 		unsigned int i = src.index + offset;
    967 
    968 		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
    969 		{
    970 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
    971 
    972 			c.x = c.x.xxxx;
    973 			c.y = c.y.yyyy;
    974 			c.z = c.z.zzzz;
    975 			c.w = c.w.wwww;
    976 
    977 			if(shader->containsDefineInstruction())   // Constant may be known at compile time
    978 			{
    979 				for(size_t j = 0; j < shader->getLength(); j++)
    980 				{
    981 					const Shader::Instruction &instruction = *shader->getInstruction(j);
    982 
    983 					if(instruction.opcode == Shader::OPCODE_DEF)
    984 					{
    985 						if(instruction.dst.index == i)
    986 						{
    987 							c.x = Float4(instruction.src[0].value[0]);
    988 							c.y = Float4(instruction.src[0].value[1]);
    989 							c.z = Float4(instruction.src[0].value[2]);
    990 							c.w = Float4(instruction.src[0].value[3]);
    991 
    992 							break;
    993 						}
    994 					}
    995 				}
    996 			}
    997 		}
    998 		else if(src.rel.type == Shader::PARAMETER_LOOP)
    999 		{
   1000 			Int loopCounter = aL[loopDepth];
   1001 
   1002 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
   1003 
   1004 			c.x = c.x.xxxx;
   1005 			c.y = c.y.yyyy;
   1006 			c.z = c.z.zzzz;
   1007 			c.w = c.w.wwww;
   1008 		}
   1009 		else
   1010 		{
   1011 			Int a = relativeAddress(src, src.bufferIndex);
   1012 
   1013 			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
   1014 
   1015 			c.x = c.x.xxxx;
   1016 			c.y = c.y.yyyy;
   1017 			c.z = c.z.zzzz;
   1018 			c.w = c.w.wwww;
   1019 		}
   1020 
   1021 		return c;
   1022 	}
   1023 
   1024 	Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
   1025 	{
   1026 		ASSERT(var.rel.deterministic);
   1027 
   1028 		if(var.rel.type == Shader::PARAMETER_TEMP)
   1029 		{
   1030 			return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
   1031 		}
   1032 		else if(var.rel.type == Shader::PARAMETER_INPUT)
   1033 		{
   1034 			return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
   1035 		}
   1036 		else if(var.rel.type == Shader::PARAMETER_OUTPUT)
   1037 		{
   1038 			return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
   1039 		}
   1040 		else if(var.rel.type == Shader::PARAMETER_CONST)
   1041 		{
   1042 			return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
   1043 		}
   1044 		else if(var.rel.type == Shader::PARAMETER_LOOP)
   1045 		{
   1046 			return aL[loopDepth];
   1047 		}
   1048 		else ASSERT(false);
   1049 
   1050 		return 0;
   1051 	}
   1052 
   1053 	Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
   1054 	{
   1055 		Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
   1056 		Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
   1057 
   1058 		return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
   1059 	}
   1060 
   1061 	void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
   1062 	{
   1063 		Vector4f row0 = fetchRegister(src1, 0);
   1064 		Vector4f row1 = fetchRegister(src1, 1);
   1065 
   1066 		dst.x = dot3(src0, row0);
   1067 		dst.y = dot3(src0, row1);
   1068 	}
   1069 
   1070 	void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
   1071 	{
   1072 		Vector4f row0 = fetchRegister(src1, 0);
   1073 		Vector4f row1 = fetchRegister(src1, 1);
   1074 		Vector4f row2 = fetchRegister(src1, 2);
   1075 
   1076 		dst.x = dot3(src0, row0);
   1077 		dst.y = dot3(src0, row1);
   1078 		dst.z = dot3(src0, row2);
   1079 	}
   1080 
   1081 	void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
   1082 	{
   1083 		Vector4f row0 = fetchRegister(src1, 0);
   1084 		Vector4f row1 = fetchRegister(src1, 1);
   1085 		Vector4f row2 = fetchRegister(src1, 2);
   1086 		Vector4f row3 = fetchRegister(src1, 3);
   1087 
   1088 		dst.x = dot3(src0, row0);
   1089 		dst.y = dot3(src0, row1);
   1090 		dst.z = dot3(src0, row2);
   1091 		dst.w = dot3(src0, row3);
   1092 	}
   1093 
   1094 	void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
   1095 	{
   1096 		Vector4f row0 = fetchRegister(src1, 0);
   1097 		Vector4f row1 = fetchRegister(src1, 1);
   1098 		Vector4f row2 = fetchRegister(src1, 2);
   1099 
   1100 		dst.x = dot4(src0, row0);
   1101 		dst.y = dot4(src0, row1);
   1102 		dst.z = dot4(src0, row2);
   1103 	}
   1104 
   1105 	void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
   1106 	{
   1107 		Vector4f row0 = fetchRegister(src1, 0);
   1108 		Vector4f row1 = fetchRegister(src1, 1);
   1109 		Vector4f row2 = fetchRegister(src1, 2);
   1110 		Vector4f row3 = fetchRegister(src1, 3);
   1111 
   1112 		dst.x = dot4(src0, row0);
   1113 		dst.y = dot4(src0, row1);
   1114 		dst.z = dot4(src0, row2);
   1115 		dst.w = dot4(src0, row3);
   1116 	}
   1117 
   1118 	void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
   1119 	{
   1120 		if(project)
   1121 		{
   1122 			Vector4f proj;
   1123 			Float4 rw = reciprocal(src0.w);
   1124 			proj.x = src0.x * rw;
   1125 			proj.y = src0.y * rw;
   1126 			proj.z = src0.z * rw;
   1127 
   1128 			dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit);
   1129 		}
   1130 		else
   1131 		{
   1132 			dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit);
   1133 		}
   1134 	}
   1135 
   1136 	void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset)
   1137 	{
   1138 		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset});
   1139 	}
   1140 
   1141 	void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod)
   1142 	{
   1143 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
   1144 	}
   1145 
   1146 	void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias)
   1147 	{
   1148 		dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias);
   1149 	}
   1150 
   1151 	void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias)
   1152 	{
   1153 		dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset});
   1154 	}
   1155 
   1156 	void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
   1157 	{
   1158 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
   1159 	}
   1160 
   1161 	void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
   1162 	{
   1163 		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
   1164 	}
   1165 
   1166 	void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
   1167 	{
   1168 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad);
   1169 	}
   1170 
   1171 	void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
   1172 	{
   1173 		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
   1174 	}
   1175 
   1176 	void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod)
   1177 	{
   1178 		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
   1179 	}
   1180 
   1181 	void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
   1182 	{
   1183 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture);
   1184 		dst = SamplerCore::textureSize(texture, lod);
   1185 	}
   1186 
   1187 	void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
   1188 	{
   1189 		Int kill = -1;
   1190 
   1191 		if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
   1192 		if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
   1193 		if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
   1194 		if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
   1195 
   1196 		// FIXME: Dynamic branching affects TEXKILL?
   1197 		//	if(shader->containsDynamicBranching())
   1198 		//	{
   1199 		//		kill = ~SignMask(enableMask());
   1200 		//	}
   1201 
   1202 		for(unsigned int q = 0; q < state.multiSample; q++)
   1203 		{
   1204 			cMask[q] &= kill;
   1205 		}
   1206 
   1207 		// FIXME: Branch to end of shader if all killed?
   1208 	}
   1209 
   1210 	void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
   1211 	{
   1212 		Int kill = 0;
   1213 
   1214 		if(shader->containsDynamicBranching())
   1215 		{
   1216 			kill = ~SignMask(enableMask(instruction));
   1217 		}
   1218 
   1219 		for(unsigned int q = 0; q < state.multiSample; q++)
   1220 		{
   1221 			cMask[q] &= kill;
   1222 		}
   1223 
   1224 		// FIXME: Branch to end of shader if all killed?
   1225 	}
   1226 
   1227 	void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
   1228 	{
   1229 		dst.x = src.x.yyww - src.x.xxzz;
   1230 		dst.y = src.y.yyww - src.y.xxzz;
   1231 		dst.z = src.z.yyww - src.z.xxzz;
   1232 		dst.w = src.w.yyww - src.w.xxzz;
   1233 	}
   1234 
   1235 	void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
   1236 	{
   1237 		dst.x = src.x.zwzw - src.x.xyxy;
   1238 		dst.y = src.y.zwzw - src.y.xyxy;
   1239 		dst.z = src.z.zwzw - src.z.xyxy;
   1240 		dst.w = src.w.zwzw - src.w.xyxy;
   1241 	}
   1242 
   1243 	void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
   1244 	{
   1245 		// abs(dFdx(src)) + abs(dFdy(src));
   1246 		dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
   1247 		dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
   1248 		dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
   1249 		dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
   1250 	}
   1251 
   1252 	void PixelProgram::BREAK()
   1253 	{
   1254 		enableBreak = enableBreak & ~enableStack[enableIndex];
   1255 	}
   1256 
   1257 	void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
   1258 	{
   1259 		Int4 condition;
   1260 
   1261 		switch(control)
   1262 		{
   1263 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
   1264 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
   1265 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
   1266 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
   1267 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
   1268 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
   1269 		default:
   1270 			ASSERT(false);
   1271 		}
   1272 
   1273 		BREAK(condition);
   1274 	}
   1275 
   1276 	void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
   1277 	{
   1278 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
   1279 
   1280 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
   1281 		{
   1282 			condition = ~condition;
   1283 		}
   1284 
   1285 		BREAK(condition);
   1286 	}
   1287 
   1288 	void PixelProgram::BREAK(Int4 &condition)
   1289 	{
   1290 		condition &= enableStack[enableIndex];
   1291 
   1292 		enableBreak = enableBreak & ~condition;
   1293 	}
   1294 
   1295 	void PixelProgram::CONTINUE()
   1296 	{
   1297 		enableContinue = enableContinue & ~enableStack[enableIndex];
   1298 	}
   1299 
   1300 	void PixelProgram::TEST()
   1301 	{
   1302 		whileTest = true;
   1303 	}
   1304 
   1305 	void PixelProgram::CALL(int labelIndex, int callSiteIndex)
   1306 	{
   1307 		if(!labelBlock[labelIndex])
   1308 		{
   1309 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
   1310 		}
   1311 
   1312 		if(callRetBlock[labelIndex].size() > 1)
   1313 		{
   1314 			callStack[stackIndex++] = UInt(callSiteIndex);
   1315 		}
   1316 
   1317 		Int4 restoreLeave = enableLeave;
   1318 
   1319 		Nucleus::createBr(labelBlock[labelIndex]);
   1320 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
   1321 
   1322 		enableLeave = restoreLeave;
   1323 	}
   1324 
   1325 	void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
   1326 	{
   1327 		if(src.type == Shader::PARAMETER_CONSTBOOL)
   1328 		{
   1329 			CALLNZb(labelIndex, callSiteIndex, src);
   1330 		}
   1331 		else if(src.type == Shader::PARAMETER_PREDICATE)
   1332 		{
   1333 			CALLNZp(labelIndex, callSiteIndex, src);
   1334 		}
   1335 		else ASSERT(false);
   1336 	}
   1337 
   1338 	void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
   1339 	{
   1340 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
   1341 
   1342 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
   1343 		{
   1344 			condition = !condition;
   1345 		}
   1346 
   1347 		if(!labelBlock[labelIndex])
   1348 		{
   1349 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
   1350 		}
   1351 
   1352 		if(callRetBlock[labelIndex].size() > 1)
   1353 		{
   1354 			callStack[stackIndex++] = UInt(callSiteIndex);
   1355 		}
   1356 
   1357 		Int4 restoreLeave = enableLeave;
   1358 
   1359 		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
   1360 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
   1361 
   1362 		enableLeave = restoreLeave;
   1363 	}
   1364 
   1365 	void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
   1366 	{
   1367 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
   1368 
   1369 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
   1370 		{
   1371 			condition = ~condition;
   1372 		}
   1373 
   1374 		condition &= enableStack[enableIndex];
   1375 
   1376 		if(!labelBlock[labelIndex])
   1377 		{
   1378 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
   1379 		}
   1380 
   1381 		if(callRetBlock[labelIndex].size() > 1)
   1382 		{
   1383 			callStack[stackIndex++] = UInt(callSiteIndex);
   1384 		}
   1385 
   1386 		enableIndex++;
   1387 		enableStack[enableIndex] = condition;
   1388 		Int4 restoreLeave = enableLeave;
   1389 
   1390 		Bool notAllFalse = SignMask(condition) != 0;
   1391 		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
   1392 		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
   1393 
   1394 		enableIndex--;
   1395 		enableLeave = restoreLeave;
   1396 	}
   1397 
   1398 	void PixelProgram::ELSE()
   1399 	{
   1400 		ifDepth--;
   1401 
   1402 		BasicBlock *falseBlock = ifFalseBlock[ifDepth];
   1403 		BasicBlock *endBlock = Nucleus::createBasicBlock();
   1404 
   1405 		if(isConditionalIf[ifDepth])
   1406 		{
   1407 			Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
   1408 			Bool notAllFalse = SignMask(condition) != 0;
   1409 
   1410 			branch(notAllFalse, falseBlock, endBlock);
   1411 
   1412 			enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
   1413 		}
   1414 		else
   1415 		{
   1416 			Nucleus::createBr(endBlock);
   1417 			Nucleus::setInsertBlock(falseBlock);
   1418 		}
   1419 
   1420 		ifFalseBlock[ifDepth] = endBlock;
   1421 
   1422 		ifDepth++;
   1423 	}
   1424 
   1425 	void PixelProgram::ENDIF()
   1426 	{
   1427 		ifDepth--;
   1428 
   1429 		BasicBlock *endBlock = ifFalseBlock[ifDepth];
   1430 
   1431 		Nucleus::createBr(endBlock);
   1432 		Nucleus::setInsertBlock(endBlock);
   1433 
   1434 		if(isConditionalIf[ifDepth])
   1435 		{
   1436 			enableIndex--;
   1437 		}
   1438 	}
   1439 
   1440 	void PixelProgram::ENDLOOP()
   1441 	{
   1442 		loopRepDepth--;
   1443 
   1444 		aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
   1445 
   1446 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
   1447 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
   1448 
   1449 		Nucleus::createBr(testBlock);
   1450 		Nucleus::setInsertBlock(endBlock);
   1451 
   1452 		loopDepth--;
   1453 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
   1454 	}
   1455 
   1456 	void PixelProgram::ENDREP()
   1457 	{
   1458 		loopRepDepth--;
   1459 
   1460 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
   1461 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
   1462 
   1463 		Nucleus::createBr(testBlock);
   1464 		Nucleus::setInsertBlock(endBlock);
   1465 
   1466 		loopDepth--;
   1467 		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
   1468 	}
   1469 
   1470 	void PixelProgram::ENDWHILE()
   1471 	{
   1472 		loopRepDepth--;
   1473 
   1474 		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
   1475 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
   1476 
   1477 		Nucleus::createBr(testBlock);
   1478 		Nucleus::setInsertBlock(endBlock);
   1479 
   1480 		enableIndex--;
   1481 		whileTest = false;
   1482 	}
   1483 
   1484 	void PixelProgram::ENDSWITCH()
   1485 	{
   1486 		loopRepDepth--;
   1487 
   1488 		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
   1489 
   1490 		Nucleus::createBr(endBlock);
   1491 		Nucleus::setInsertBlock(endBlock);
   1492 	}
   1493 
   1494 	void PixelProgram::IF(const Src &src)
   1495 	{
   1496 		if(src.type == Shader::PARAMETER_CONSTBOOL)
   1497 		{
   1498 			IFb(src);
   1499 		}
   1500 		else if(src.type == Shader::PARAMETER_PREDICATE)
   1501 		{
   1502 			IFp(src);
   1503 		}
   1504 		else
   1505 		{
   1506 			Int4 condition = As<Int4>(fetchRegister(src).x);
   1507 			IF(condition);
   1508 		}
   1509 	}
   1510 
   1511 	void PixelProgram::IFb(const Src &boolRegister)
   1512 	{
   1513 		ASSERT(ifDepth < 24 + 4);
   1514 
   1515 		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
   1516 
   1517 		if(boolRegister.modifier == Shader::MODIFIER_NOT)
   1518 		{
   1519 			condition = !condition;
   1520 		}
   1521 
   1522 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
   1523 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
   1524 
   1525 		branch(condition, trueBlock, falseBlock);
   1526 
   1527 		isConditionalIf[ifDepth] = false;
   1528 		ifFalseBlock[ifDepth] = falseBlock;
   1529 
   1530 		ifDepth++;
   1531 	}
   1532 
   1533 	void PixelProgram::IFp(const Src &predicateRegister)
   1534 	{
   1535 		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
   1536 
   1537 		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
   1538 		{
   1539 			condition = ~condition;
   1540 		}
   1541 
   1542 		IF(condition);
   1543 	}
   1544 
   1545 	void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
   1546 	{
   1547 		Int4 condition;
   1548 
   1549 		switch(control)
   1550 		{
   1551 		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
   1552 		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
   1553 		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
   1554 		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
   1555 		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
   1556 		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
   1557 		default:
   1558 			ASSERT(false);
   1559 		}
   1560 
   1561 		IF(condition);
   1562 	}
   1563 
   1564 	void PixelProgram::IF(Int4 &condition)
   1565 	{
   1566 		condition &= enableStack[enableIndex];
   1567 
   1568 		enableIndex++;
   1569 		enableStack[enableIndex] = condition;
   1570 
   1571 		BasicBlock *trueBlock = Nucleus::createBasicBlock();
   1572 		BasicBlock *falseBlock = Nucleus::createBasicBlock();
   1573 
   1574 		Bool notAllFalse = SignMask(condition) != 0;
   1575 
   1576 		branch(notAllFalse, trueBlock, falseBlock);
   1577 
   1578 		isConditionalIf[ifDepth] = true;
   1579 		ifFalseBlock[ifDepth] = falseBlock;
   1580 
   1581 		ifDepth++;
   1582 	}
   1583 
   1584 	void PixelProgram::LABEL(int labelIndex)
   1585 	{
   1586 		if(!labelBlock[labelIndex])
   1587 		{
   1588 			labelBlock[labelIndex] = Nucleus::createBasicBlock();
   1589 		}
   1590 
   1591 		Nucleus::setInsertBlock(labelBlock[labelIndex]);
   1592 		currentLabel = labelIndex;
   1593 	}
   1594 
   1595 	void PixelProgram::LOOP(const Src &integerRegister)
   1596 	{
   1597 		loopDepth++;
   1598 
   1599 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
   1600 		aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
   1601 		increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
   1602 
   1603 		//	If(increment[loopDepth] == 0)
   1604 		//	{
   1605 		//		increment[loopDepth] = 1;
   1606 		//	}
   1607 
   1608 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
   1609 		BasicBlock *testBlock = Nucleus::createBasicBlock();
   1610 		BasicBlock *endBlock = Nucleus::createBasicBlock();
   1611 
   1612 		loopRepTestBlock[loopRepDepth] = testBlock;
   1613 		loopRepEndBlock[loopRepDepth] = endBlock;
   1614 
   1615 		// FIXME: jump(testBlock)
   1616 		Nucleus::createBr(testBlock);
   1617 		Nucleus::setInsertBlock(testBlock);
   1618 
   1619 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
   1620 		Nucleus::setInsertBlock(loopBlock);
   1621 
   1622 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
   1623 
   1624 		loopRepDepth++;
   1625 	}
   1626 
   1627 	void PixelProgram::REP(const Src &integerRegister)
   1628 	{
   1629 		loopDepth++;
   1630 
   1631 		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
   1632 		aL[loopDepth] = aL[loopDepth - 1];
   1633 
   1634 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
   1635 		BasicBlock *testBlock = Nucleus::createBasicBlock();
   1636 		BasicBlock *endBlock = Nucleus::createBasicBlock();
   1637 
   1638 		loopRepTestBlock[loopRepDepth] = testBlock;
   1639 		loopRepEndBlock[loopRepDepth] = endBlock;
   1640 
   1641 		// FIXME: jump(testBlock)
   1642 		Nucleus::createBr(testBlock);
   1643 		Nucleus::setInsertBlock(testBlock);
   1644 
   1645 		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
   1646 		Nucleus::setInsertBlock(loopBlock);
   1647 
   1648 		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
   1649 
   1650 		loopRepDepth++;
   1651 	}
   1652 
   1653 	void PixelProgram::WHILE(const Src &temporaryRegister)
   1654 	{
   1655 		enableIndex++;
   1656 
   1657 		BasicBlock *loopBlock = Nucleus::createBasicBlock();
   1658 		BasicBlock *testBlock = Nucleus::createBasicBlock();
   1659 		BasicBlock *endBlock = Nucleus::createBasicBlock();
   1660 
   1661 		loopRepTestBlock[loopRepDepth] = testBlock;
   1662 		loopRepEndBlock[loopRepDepth] = endBlock;
   1663 
   1664 		Int4 restoreBreak = enableBreak;
   1665 		Int4 restoreContinue = enableContinue;
   1666 
   1667 		// TODO: jump(testBlock)
   1668 		Nucleus::createBr(testBlock);
   1669 		Nucleus::setInsertBlock(testBlock);
   1670 		enableContinue = restoreContinue;
   1671 
   1672 		const Vector4f &src = fetchRegister(temporaryRegister);
   1673 		Int4 condition = As<Int4>(src.x);
   1674 		condition &= enableStack[enableIndex - 1];
   1675 		if(shader->containsLeaveInstruction()) condition &= enableLeave;
   1676 		if(shader->containsBreakInstruction()) condition &= enableBreak;
   1677 		enableStack[enableIndex] = condition;
   1678 
   1679 		Bool notAllFalse = SignMask(condition) != 0;
   1680 		branch(notAllFalse, loopBlock, endBlock);
   1681 
   1682 		Nucleus::setInsertBlock(endBlock);
   1683 		enableBreak = restoreBreak;
   1684 
   1685 		Nucleus::setInsertBlock(loopBlock);
   1686 
   1687 		loopRepDepth++;
   1688 	}
   1689 
   1690 	void PixelProgram::SWITCH()
   1691 	{
   1692 		BasicBlock *endBlock = Nucleus::createBasicBlock();
   1693 
   1694 		loopRepTestBlock[loopRepDepth] = nullptr;
   1695 		loopRepEndBlock[loopRepDepth] = endBlock;
   1696 
   1697 		Int4 restoreBreak = enableBreak;
   1698 
   1699 		BasicBlock *currentBlock = Nucleus::getInsertBlock();
   1700 
   1701 		Nucleus::setInsertBlock(endBlock);
   1702 		enableBreak = restoreBreak;
   1703 
   1704 		Nucleus::setInsertBlock(currentBlock);
   1705 
   1706 		loopRepDepth++;
   1707 	}
   1708 
   1709 	void PixelProgram::RET()
   1710 	{
   1711 		if(currentLabel == -1)
   1712 		{
   1713 			returnBlock = Nucleus::createBasicBlock();
   1714 			Nucleus::createBr(returnBlock);
   1715 		}
   1716 		else
   1717 		{
   1718 			BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
   1719 
   1720 			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
   1721 			{
   1722 				// FIXME: Encapsulate
   1723 				UInt index = callStack[--stackIndex];
   1724 
   1725 				Value *value = index.loadValue();
   1726 				SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
   1727 
   1728 				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
   1729 				{
   1730 					Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
   1731 				}
   1732 			}
   1733 			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
   1734 			{
   1735 				Nucleus::createBr(callRetBlock[currentLabel][0]);
   1736 			}
   1737 			else   // Function isn't called
   1738 			{
   1739 				Nucleus::createBr(unreachableBlock);
   1740 			}
   1741 
   1742 			Nucleus::setInsertBlock(unreachableBlock);
   1743 			Nucleus::createUnreachable();
   1744 		}
   1745 	}
   1746 
   1747 	void PixelProgram::LEAVE()
   1748 	{
   1749 		enableLeave = enableLeave & ~enableStack[enableIndex];
   1750 
   1751 		// FIXME: Return from function if all instances left
   1752 		// FIXME: Use enableLeave in other control-flow constructs
   1753 	}
   1754 }
   1755