Home | History | Annotate | Download | only in pixelflinger2
      1 /**
      2  **
      3  ** Copyright 2011, The Android Open Source Project
      4  **
      5  ** Licensed under the Apache License, Version 2.0 (the "License");
      6  ** you may not use this file except in compliance with the License.
      7  ** You may obtain a copy of the License at
      8  **
      9  **     http://www.apache.org/licenses/LICENSE-2.0
     10  **
     11  ** Unless required by applicable law or agreed to in writing, software
     12  ** distributed under the License is distributed on an "AS IS" BASIS,
     13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  ** See the License for the specific language governing permissions and
     15  ** limitations under the License.
     16  */
     17 
     18 #include "src/pixelflinger2/pixelflinger2.h"
     19 #include "src/pixelflinger2/llvm_helper.h"
     20 #include "src/mesa/main/mtypes.h"
     21 
     22 #include <llvm/Module.h>
     23 
     24 //#undef LOGD
     25 //#define LOGD(...)
     26 
     27 using namespace llvm;
     28 
     29 static void StencilOp(IRBuilder<> &builder, const unsigned char op,
     30                       Value * sPtr, Value * sRef)
     31 {
     32    CondBranch condBranch(builder);
     33    Value * s = builder.CreateLoad(sPtr, "stenciOpS");
     34    switch (op) {
     35    case 0 : // GL_ZERO
     36       builder.CreateStore(builder.getInt8(0), sPtr);
     37       break;
     38    case 1 : // GL_KEEP
     39       builder.CreateStore(s, sPtr);
     40       break;
     41    case 2 : // GL_REPLACE
     42       builder.CreateStore(sRef, sPtr);
     43       break;
     44    case 3 : // GL_INCR
     45       condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255)));
     46       builder.CreateStore(s, sPtr);
     47       condBranch.elseop();
     48       builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
     49       condBranch.endif();
     50       break;
     51    case 4 : // GL_DECR
     52       condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0)));
     53       builder.CreateStore(s, sPtr);
     54       condBranch.elseop();
     55       builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
     56       condBranch.endif();
     57       break;
     58    case 5 : // GL_INVERT
     59       builder.CreateStore(builder.CreateNot(s), sPtr);
     60       break;
     61    case 6 : // GL_INCR_WRAP
     62       builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
     63       break;
     64    case 7 : // GL_DECR_WRAP
     65       builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
     66       break;
     67    default:
     68       assert(0);
     69       break;
     70    }
     71 }
     72 
     73 static Value * StencilOp(IRBuilder<> & builder, Value * face,
     74                          const unsigned char frontOp, const unsigned char backOp,
     75                          Value * sPtr, Value * sRef)
     76 {
     77    CondBranch condBranch(builder);
     78    if (frontOp != backOp)
     79       condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0)));
     80 
     81    StencilOp(builder, frontOp, sPtr, sRef);
     82 
     83    if (frontOp != backOp) {
     84       condBranch.elseop();
     85       StencilOp(builder, backOp, sPtr, sRef);
     86       condBranch.endif();
     87    }
     88    return builder.CreateLoad(sPtr);
     89 }
     90 
     91 static void StencilFunc(IRBuilder<> & builder, const unsigned char func,
     92                         Value * s, Value * sRef, Value * sCmpPtr)
     93 {
     94    switch (func) {
     95    case GL_NEVER & 0x7:
     96       builder.CreateStore(builder.getFalse(), sCmpPtr);
     97       break;
     98    case GL_LESS & 0x7:
     99       builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr);
    100       break;
    101    case GL_EQUAL & 0x7:
    102       builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr);
    103       break;
    104    case GL_LEQUAL & 0x7:
    105       builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr);
    106       break;
    107    case GL_GREATER & 0x7:
    108       builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr);
    109       break;
    110    case GL_NOTEQUAL & 0x7:
    111       builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr);
    112       break;
    113    case GL_GEQUAL & 0x7:
    114       builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr);
    115       break;
    116    case GL_ALWAYS & 0x7:
    117       builder.CreateStore(builder.getTrue(), sCmpPtr);
    118       break;
    119    default:
    120       assert(0);
    121       break;
    122    }
    123 }
    124 
    125 static Value * BlendFactor(const unsigned mode, Value * src, Value * dst,
    126                            Value * constant, Value * one, Value * zero,
    127                            Value * srcA, Value * dstA, Value * constantA,
    128                            Value * sOne, const bool isVector, IRBuilder<> & builder)
    129 {
    130    Value * factor = NULL;
    131    switch (mode) {
    132    case GGLBlendState::GGL_ZERO:
    133       factor = zero;
    134       break;
    135    case GGLBlendState::GGL_ONE:
    136       factor = one;
    137       break;
    138    case GGLBlendState::GGL_SRC_COLOR:
    139       factor = src;
    140       break;
    141    case GGLBlendState::GGL_ONE_MINUS_SRC_COLOR:
    142       factor = builder.CreateSub(one, src);
    143       break;
    144    case GGLBlendState::GGL_DST_COLOR:
    145       factor = dst;
    146       break;
    147    case GGLBlendState::GGL_ONE_MINUS_DST_COLOR:
    148       factor = builder.CreateSub(one, dst);
    149       break;
    150    case GGLBlendState::GGL_SRC_ALPHA:
    151       factor = srcA;
    152       if (isVector)
    153          factor = intVec(builder, factor, factor, factor, factor);
    154       break;
    155    case GGLBlendState::GGL_ONE_MINUS_SRC_ALPHA:
    156       factor = builder.CreateSub(sOne, srcA);
    157       if (isVector)
    158          factor = intVec(builder, factor, factor, factor, factor);
    159       break;
    160    case GGLBlendState::GGL_DST_ALPHA:
    161       factor = dstA;
    162       if (isVector)
    163          factor = intVec(builder, factor, factor, factor, factor);
    164       break;
    165    case GGLBlendState::GGL_ONE_MINUS_DST_ALPHA:
    166       factor = builder.CreateSub(sOne, dstA);
    167       if (isVector)
    168          factor = intVec(builder, factor, factor, factor, factor);
    169       break;
    170    case GGLBlendState::GGL_SRC_ALPHA_SATURATE:
    171       // valid only for source color and alpha
    172       factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA));
    173       if (isVector)
    174          factor = intVec(builder, factor, factor, factor, sOne);
    175       else
    176          factor = sOne; // when it's used for source alpha, it's just 1
    177       break;
    178    case GGLBlendState::GGL_CONSTANT_COLOR:
    179       factor = constant;
    180       break;
    181    case GGLBlendState::GGL_ONE_MINUS_CONSTANT_COLOR:
    182       factor = builder.CreateSub(one, constant);
    183       break;
    184    case GGLBlendState::GGL_CONSTANT_ALPHA:
    185       factor = constantA;
    186       if (isVector)
    187          factor = intVec(builder, factor, factor, factor, factor);
    188       break;
    189    case GGLBlendState::GGL_ONE_MINUS_CONSTANT_ALPHA:
    190       factor = builder.CreateSub(sOne, constantA);
    191       if (isVector)
    192          factor = intVec(builder, factor, factor, factor, factor);
    193       break;
    194    default:
    195       assert(0);
    196       break;
    197    }
    198    return factor;
    199 }
    200 
    201 static Value * Saturate(IRBuilder<> & builder, Value * intVector)
    202 {
    203    intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0));
    204    return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255));
    205 }
    206 
    207 // src is int32x4 [0,255] rgba vector, and combines them into int32
    208 // RGB_565 channel order is weird
    209 static Value * IntVectorToScreenColor(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
    210 {
    211    if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
    212       src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24));
    213       std::vector<Value *> comps = extractVector(builder, src);
    214       comps[0] = builder.CreateOr(comps[0], comps[1]);
    215       comps[0] = builder.CreateOr(comps[0], comps[2]);
    216       comps[0] = builder.CreateOr(comps[0], comps[3]);
    217       return comps[0];
    218    } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
    219       src = builder.CreateAnd(src, constIntVec(builder, 0xf8, 0xfc, 0xf8, 0));
    220       std::vector<Value *> comps = extractVector(builder, src);
    221       // channel order is weird
    222       for (unsigned i = 0; i < 4; i++)
    223          comps[i] = builder.CreateTrunc(comps[i], builder.getInt16Ty());
    224       comps[2] = builder.CreateLShr(comps[2], 3);
    225       comps[1] = builder.CreateShl(comps[1], 3);
    226       comps[0] = builder.CreateShl(comps[0], 8);
    227 
    228       comps[0] = builder.CreateOr(comps[0], comps[1]);
    229       comps[0] = builder.CreateOr(comps[0], comps[2]);
    230       return comps[0];
    231    } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
    232       return builder.getInt32(0);
    233    else
    234       assert(0);
    235    return NULL;
    236 }
    237 
    238 // src is int32 or int16, return is int32x4 [0,255] rgba
    239 // RGB_565 channel order is weird
    240 static Value * ScreenColorToIntVector(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
    241 {
    242    src = builder.CreateZExt(src, builder.getInt32Ty());
    243    Value * dst = intVec(builder, src, src, src, src);
    244    if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
    245       dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24));
    246       dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff));
    247    } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
    248       // channel order is weird
    249       dst = builder.CreateAnd(dst, constIntVec(builder, 0xf800, 0x7e0, 0x1f, 0));
    250       dst = builder.CreateLShr(dst, constIntVec(builder, 8, 3, 0, 0));
    251       dst = builder.CreateShl(dst, constIntVec(builder, 0, 0, 3, 0));
    252       dst = builder.CreateOr(dst, constIntVec(builder, 0, 0, 0, 0xff));
    253    } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
    254       LOGD("pf2: ScreenColorToIntVector GGL_PIXEL_FORMAT_UNKNOWN"); // not set yet, do nothing
    255    else
    256       assert(0);
    257    return dst;
    258 }
    259 
    260 // src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32
    261 Value * GenerateFSBlend(const GGLState * gglCtx, const GGLPixelFormat format, /*const RegDesc * regDesc,*/
    262                         IRBuilder<> & builder, Value * src, Value * dst)
    263 {
    264    Type * const intType = builder.getInt32Ty();
    265 
    266    // TODO cast the outputs pointer type to int for writing to minimize bandwidth
    267    if (!gglCtx->blendState.enable) {
    268 //        if (regDesc->IsInt32Color())
    269 //        {
    270 //            debug_printf("GenerateFixedFS dst is already scalar fixed0 \n");
    271 //            src = builder.CreateExtractElement(src, builder.getInt32(0));
    272 //            src = builder.CreateBitCast(src, intType); // it's already RGBA int32
    273 //        }
    274 //        else if (regDesc->IsVectorType(Float))
    275 //        {
    276       src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
    277       src = builder.CreateFPToSI(src, intVecType(builder));
    278       src = Saturate(builder, src);
    279       src = IntVectorToScreenColor(builder, format, src);
    280 //        }
    281 //        else if (regDesc->IsVectorType(Fixed8))
    282 //        {
    283 //            src = builder.CreateBitCast(src, instr->GetIntVectorType());
    284 //            src = Saturate(instr, src);
    285 //            src = IntVectorToColor(instr, storage, src);
    286 //        }
    287 //        else if (regDesc->IsVectorType(Fixed16))
    288 //        {
    289 //            src = builder.CreateBitCast(src, instr->GetIntVectorType());
    290 //            src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
    291 //            src = Saturate(instr, src);
    292 //            src = IntVectorToColor(instr, storage, src);
    293 //        }
    294 //        else
    295 //            assert(0);
    296       return src;
    297    }
    298    // blending, so convert src to <4 x i32>
    299 //    if (regDesc->IsInt32Color())
    300 //    {
    301 //        src = builder.CreateExtractElement(src, builder.getInt32(0));
    302 //        src = builder.CreateBitCast(src, intType); // it's already RGBA int32
    303 //
    304 //        Value * channels = Constant::getNullValue(instr->GetIntVectorType());
    305 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(0));
    306 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(1));
    307 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(2));
    308 //        channels = builder.CreateInsertElement(channels, src, builder.getInt32(3));
    309 //        channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24));
    310 //        channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff));
    311 //        src = channels;
    312 //    }
    313 //    else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA
    314 //        src = builder.CreateBitCast(src, instr->GetIntVectorType());
    315 //    else if (regDesc->IsVectorType(Fixed16))
    316 //    {
    317 //        src = builder.CreateBitCast(src, instr->GetIntVectorType());
    318 //        // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision
    319 //        src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
    320 //    }
    321 //    else if (regDesc->IsVectorType(Float))
    322 //    {
    323    src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
    324    src = builder.CreateFPToSI(src, intVecType(builder));
    325 //    }
    326 //    else
    327 //        assert(0);
    328 
    329    Value * const one = constIntVec(builder,255,255,255,255);
    330    Value * const zero = constIntVec(builder,0,0,0,0);
    331    Value * const sOne = builder.getInt32(255);
    332    Value * const sZero = builder.getInt32(0);
    333 
    334 #if USE_LLVM_SCANLINE
    335    Value * constant = constIntVec(builder,gglCtx->blendState.color[0],
    336                                   gglCtx->blendState.color[1],
    337                                   gglCtx->blendState.color[2],
    338                                   gglCtx->blendState.color[3]);
    339 #else
    340    Value * constant = NULL;
    341    assert(0);
    342 #endif
    343 
    344    Value * srcA = extractVector(builder,src)[3];
    345    Value * dstA = extractVector(builder,dst)[3];
    346    Value * constantA = extractVector(builder,constant)[3];
    347 
    348    Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst,
    349                             constant, one, zero, srcA, dstA,
    350                             constantA, sOne, true, builder);
    351    if (gglCtx->blendState.scf != gglCtx->blendState.saf) {
    352       Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA,
    353                                 constantA, sOne, sZero, srcA, dstA,
    354                                 constantA, sOne, false, builder);
    355       sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3),
    356                                        name("sfAStore"));
    357    }
    358 
    359    Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst,
    360                             constant, one, zero, srcA, dstA,
    361                             constantA, sOne, true, builder);
    362    if (gglCtx->blendState.dcf != gglCtx->blendState.daf) {
    363       Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA,
    364                                 constantA, sOne, sZero, srcA, dstA,
    365                                 constantA, sOne, false, builder);
    366       df = builder.CreateInsertElement(df, dfA, builder.getInt32(3),
    367                                        name("dfAStore"));
    368    }
    369 
    370    // this is factor *= 256 / 255; factors have a chance of constant folding
    371    sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7)));
    372    df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7)));
    373 
    374    src = builder.CreateMul(src, sf);
    375    dst = builder.CreateMul(dst, df);
    376 
    377    Value * res = NULL;
    378    switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
    379    case GL_FUNC_ADD:
    380       res = builder.CreateAdd(src, dst);
    381       break;
    382    case GL_FUNC_SUBTRACT:
    383       res = builder.CreateSub(src, dst);
    384       break;
    385    case GL_FUNC_REVERSE_SUBTRACT:
    386       res = builder.CreateSub(dst, src);
    387       break;
    388    default:
    389       assert(0);
    390       break;
    391    }
    392    if (gglCtx->blendState.ce != gglCtx->blendState.ae) {
    393       srcA = extractVector(builder,src)[3];
    394       dstA = extractVector(builder,dst)[3];
    395       Value * resA = NULL;
    396       switch (gglCtx->blendState.ae + GL_FUNC_ADD) {
    397       case GL_FUNC_ADD:
    398          resA = builder.CreateAdd(srcA, dstA);
    399          break;
    400       case GL_FUNC_SUBTRACT:
    401          resA = builder.CreateSub(srcA, dstA);
    402          break;
    403       case GL_FUNC_REVERSE_SUBTRACT:
    404          resA = builder.CreateSub(dstA, srcA);
    405          break;
    406       default:
    407          assert(0);
    408          break;
    409       }
    410       res = builder.CreateInsertElement(res, resA, builder.getInt32(3),
    411                                         name("resAStore"));
    412    }
    413 
    414    res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8));
    415    res = Saturate(builder, res);
    416    res = IntVectorToScreenColor(builder, format, res);
    417    return res;
    418 }
    419 
    420 static FunctionType * ScanLineFunctionType(IRBuilder<> & builder)
    421 {
    422    std::vector<Type*> funcArgs;
    423    VectorType * vectorType = floatVecType(builder);
    424    PointerType * vectorPtr = PointerType::get(vectorType, 0);
    425    Type * intType = builder.getInt32Ty();
    426    PointerType * intPointerType = PointerType::get(intType, 0);
    427    PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0);
    428 
    429    funcArgs.push_back(vectorPtr); // start
    430    funcArgs.push_back(vectorPtr); // step
    431    funcArgs.push_back(vectorPtr); // constants
    432    funcArgs.push_back(intPointerType); // frame
    433    funcArgs.push_back(intPointerType); // depth
    434    funcArgs.push_back(bytePointerType); // stencil
    435    funcArgs.push_back(bytePointerType); // stencil state
    436    funcArgs.push_back(intType); // count
    437 
    438    FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(),
    439                                                   llvm::ArrayRef<Type*>(funcArgs),
    440                                                   /*isVarArg=*/false);
    441 
    442    return functionType;
    443 }
    444 
    445 // generated scanline function parameters are VertexOutput * start, VertexOutput * step,
    446 // unsigned * frame, int * depth, unsigned char * stencil,
    447 // GGLActiveStencilState * stencilState, unsigned count
    448 void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, Module * mod,
    449                       const char * shaderName, const char * scanlineName)
    450 {
    451    IRBuilder<> builder(mod->getContext());
    452 //   debug_printf("GenerateScanLine %s \n", scanlineName);
    453 
    454    Type * intType = builder.getInt32Ty();
    455    PointerType * intPointerType = PointerType::get(intType, 0);
    456    Type * byteType = builder.getInt8Ty();
    457    PointerType * bytePointerType = PointerType::get(byteType, 0);
    458 
    459    Function * func = mod->getFunction(scanlineName);
    460    if (func)
    461       return;
    462 
    463    func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName,
    464                                ScanLineFunctionType(builder)));
    465 
    466    BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0);
    467    builder.SetInsertPoint(label_entry);
    468    CondBranch condBranch(builder);
    469 
    470    Function::arg_iterator args = func->arg_begin();
    471    Value * start = args++;
    472    start->setName("start");
    473    Value * step = args++;
    474    step->setName("step");
    475    Value * constants = args++;
    476    constants->setName("constants");
    477 
    478    // need alloc to be able to assign to it by using store
    479    Value * framePtr = builder.CreateAlloca(intPointerType);
    480    builder.CreateStore(args++, framePtr);
    481    Value * depthPtr = builder.CreateAlloca(intPointerType);
    482    builder.CreateStore(args++, depthPtr);
    483    Value * stencilPtr = builder.CreateAlloca(bytePointerType);
    484    builder.CreateStore(args++, stencilPtr);
    485    Value * stencilState = args++;
    486    stencilState->setName("stencilState");
    487    Value * countPtr = builder.CreateAlloca(intType);
    488    builder.CreateStore(args++, countPtr);
    489 
    490    Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL;
    491    if (gglCtx->bufferState.stencilTest) {
    492       sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace");
    493       if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref)
    494          sRef = builder.getInt8(gglCtx->frontStencil.ref);
    495       else
    496          sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef");
    497       if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask)
    498          sMask = builder.getInt8(gglCtx->frontStencil.mask);
    499       else
    500          sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask");
    501       if (gglCtx->frontStencil.func == gglCtx->backStencil.func)
    502          sFunc = builder.getInt8(gglCtx->frontStencil.func);
    503       else
    504          sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc");
    505    }
    506 
    507    condBranch.beginLoop(); // while (count > 0)
    508 
    509    assert(framePtr && gglCtx);
    510    // get values
    511    Value * frame = NULL;
    512    if (GGL_PIXEL_FORMAT_RGBA_8888 == gglCtx->bufferState.colorFormat)
    513       frame = builder.CreateLoad(framePtr);
    514    else if (GGL_PIXEL_FORMAT_RGB_565 == gglCtx->bufferState.colorFormat) {
    515       frame = builder.CreateLoad(framePtr);
    516       frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt16Ty(), 0));
    517    } else if (GGL_PIXEL_FORMAT_UNKNOWN == gglCtx->bufferState.colorFormat)
    518       frame = builder.CreateLoad(framePtr); // color buffer not set yet
    519    else
    520       assert(0);
    521 
    522    frame->setName("frame");
    523    Value * depth = NULL, * stencil = NULL;
    524    if (gglCtx->bufferState.depthTest) {
    525       assert(GGL_PIXEL_FORMAT_Z_32 == gglCtx->bufferState.depthFormat);
    526       depth = builder.CreateLoad(depthPtr);
    527       depth->setName("depth");
    528    }
    529 
    530    Value * count = builder.CreateLoad(countPtr);
    531    count->setName("count");
    532 
    533    Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0));
    534    condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0)
    535    condBranch.brk(); // break;
    536    condBranch.endif();
    537 
    538    Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL;
    539    if (gglCtx->bufferState.stencilTest) {
    540       stencil = builder.CreateLoad(stencilPtr);
    541       stencil->setName("stencil");
    542 
    543       // temporaries to load/store value
    544       sCmpPtr = builder.CreateAlloca(builder.getInt1Ty());
    545       sCmpPtr->setName("sCmpPtr");
    546       sPtr = builder.CreateAlloca(byteType);
    547       sPtr->setName("sPtr");
    548 
    549       s = builder.CreateLoad(stencil);
    550       s = builder.CreateAnd(s, sMask);
    551       builder.CreateStore(s, sPtr);
    552 
    553       if (gglCtx->frontStencil.func != gglCtx->backStencil.func)
    554          condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0)));
    555 
    556       StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr);
    557 
    558       if (gglCtx->frontStencil.func != gglCtx->backStencil.func) {
    559          condBranch.elseop();
    560          StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr);
    561          condBranch.endif();
    562       }
    563 
    564       sCmp = builder.CreateLoad(sCmpPtr);
    565    } else
    566       sCmp = ConstantInt::getTrue(mod->getContext());
    567    sCmp->setName("sCmp");
    568 
    569    Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL;
    570    if (gglCtx->bufferState.depthTest) {
    571       depthZ  = builder.CreateLoad(depth, "depthZ"); // z stored in buffer
    572       zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z
    573       zPtr->setName("zPtr");
    574 
    575       // modified incoming z
    576       z = builder.CreateBitCast(start, intPointerType);
    577       z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET +
    578                                              GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
    579       z = builder.CreateLoad(z, "z");
    580 
    581       builder.CreateStore(z, zPtr);
    582 
    583       Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0));
    584       condBranch.ifCond(zNegative);
    585       // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int
    586       z = builder.CreateXor(z, builder.getInt32(0x7fffffff));
    587       builder.CreateStore(z, zPtr);
    588 
    589       condBranch.endif();
    590 
    591       z = builder.CreateLoad(zPtr, "z");
    592 
    593       switch (0x200 | gglCtx->bufferState.depthFunc) {
    594       case GL_NEVER:
    595          zCmp = ConstantInt::getFalse(mod->getContext());
    596          break;
    597       case GL_LESS:
    598          zCmp = builder.CreateICmpSLT(z, depthZ);
    599          break;
    600       case GL_EQUAL:
    601          zCmp = builder.CreateICmpEQ(z, depthZ);
    602          break;
    603       case GL_LEQUAL:
    604          zCmp = builder.CreateICmpSLE(z, depthZ);
    605          break;
    606       case GL_GREATER:
    607          zCmp = builder.CreateICmpSGT(z, depthZ);
    608          break;
    609       case GL_NOTEQUAL:
    610          zCmp = builder.CreateICmpNE(z, depthZ);
    611          break;
    612       case GL_GEQUAL:
    613          zCmp = builder.CreateICmpSGE(z, depthZ);
    614          break;
    615       case GL_ALWAYS:
    616          zCmp = ConstantInt::getTrue(mod->getContext());
    617          break;
    618       default:
    619          assert(0);
    620          break;
    621       }
    622    } else // no depth test means always pass
    623       zCmp = ConstantInt::getTrue(mod->getContext());
    624    zCmp->setName("zCmp");
    625 
    626    condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail");
    627    condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail");
    628 
    629    Value * inputs = start;
    630    Value * outputs = start;
    631 
    632    Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start,
    633                        offsetof(VertexOutput,fragColor)/sizeof(Vector4));
    634 
    635    Function * fsFunction = mod->getFunction(shaderName);
    636    assert(fsFunction);
    637    CallInst *call = builder.CreateCall3(fsFunction,inputs, outputs, constants);
    638    call->setCallingConv(CallingConv::C);
    639    call->setTailCall(false);
    640 
    641    Value * dst = Constant::getNullValue(intVecType(builder));
    642    if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) {
    643       Value * frameColor = builder.CreateLoad(frame, "frameColor");
    644       dst = ScreenColorToIntVector(builder, gglCtx->bufferState.colorFormat, frameColor);
    645    }
    646 
    647    Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0);
    648    src = builder.CreateLoad(src);
    649 
    650    Value * color = GenerateFSBlend(gglCtx, gglCtx->bufferState.colorFormat,/*&prog->outputRegDesc,*/ builder, src, dst);
    651    builder.CreateStore(color, frame);
    652    // TODO DXL depthmask check
    653    if (gglCtx->bufferState.depthTest) {
    654       z = builder.CreateBitCast(z, intType);
    655       builder.CreateStore(z, depth); // store z
    656    }
    657 
    658    if (gglCtx->bufferState.stencilTest)
    659       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass,
    660                                     gglCtx->backStencil.dPass, sPtr, sRef), stencil);
    661 
    662    condBranch.elseop(); // failed z test
    663 
    664    if (gglCtx->bufferState.stencilTest)
    665       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail,
    666                                     gglCtx->backStencil.dFail, sPtr, sRef), stencil);
    667    condBranch.endif();
    668    condBranch.elseop(); // failed s test
    669 
    670    if (gglCtx->bufferState.stencilTest)
    671       builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail,
    672                                     gglCtx->backStencil.sFail, sPtr, sRef), stencil);
    673 
    674    condBranch.endif();
    675    assert(frame);
    676    frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++
    677    // frame may have been casted to short* from int*, so cast back
    678    frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt32Ty(), 0));
    679    builder.CreateStore(frame, framePtr);
    680    if (gglCtx->bufferState.depthTest) {
    681       depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++
    682       builder.CreateStore(depth, depthPtr);
    683    }
    684    if (gglCtx->bufferState.stencilTest) {
    685       stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++
    686       builder.CreateStore(stencil, stencilPtr);
    687    }
    688    Value * vPtr = NULL, * v = NULL, * dx = NULL;
    689    if (program->UsesFragCoord) {
    690       vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
    691              GGL_FS_INPUT_FRAGCOORD_INDEX);
    692       v = builder.CreateLoad(vPtr);
    693       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
    694                                               GGL_FS_INPUT_FRAGCOORD_INDEX);
    695       dx = builder.CreateLoad(dx);
    696       v = builder.CreateFAdd(v, dx);
    697       builder.CreateStore(v, vPtr);
    698    } else if (gglCtx->bufferState.depthTest) {
    699       Type * floatType = builder.getFloatTy();
    700       PointerType * floatPointerType = PointerType::get(floatType, 0);
    701       vPtr = builder.CreateBitCast(start, floatPointerType);
    702       vPtr = builder.CreateConstInBoundsGEP1_32(vPtr,
    703              (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
    704       v = builder.CreateLoad(vPtr);
    705       dx = builder.CreateBitCast(step, floatPointerType);
    706       dx = builder.CreateConstInBoundsGEP1_32(dx,
    707                                               (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
    708       dx = builder.CreateLoad(dx);
    709       v = builder.CreateFAdd(v, dx);
    710       builder.CreateStore(v, vPtr);
    711    }
    712 
    713    if (program->UsesPointCoord) {
    714       vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
    715              GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
    716       v = builder.CreateLoad(vPtr);
    717       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
    718                                               GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
    719       dx = builder.CreateLoad(dx);
    720       v = builder.CreateFAdd(v, dx);
    721       builder.CreateStore(v, vPtr);
    722    }
    723 
    724    for (unsigned i = 0; i < program->VaryingSlots; ++i) {
    725       vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i);
    726       v = builder.CreateLoad(vPtr);
    727       dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
    728                                               GGL_FS_INPUT_VARYINGS_INDEX + i);
    729       dx = builder.CreateLoad(dx);
    730       v = builder.CreateFAdd(v, dx);
    731       builder.CreateStore(v, vPtr);
    732    }
    733 
    734    count = builder.CreateSub(count, builder.getInt32(1));
    735    builder.CreateStore(count, countPtr); // count--;
    736 
    737    condBranch.endLoop();
    738 
    739    builder.CreateRetVoid();
    740 }
    741