Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
      3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
      4  * Copyright  2010 Intel Corporation
      5  * Copyright  2010 Luca Barbieri
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the next
     15  * paragraph) shall be included in all copies or substantial portions of the
     16  * Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     24  * DEALINGS IN THE SOFTWARE.
     25  */
     26 
     27 /**
     28  * \file ir_to_llvm.cpp
     29  *
     30  * Translates the IR to LLVM
     31  */
     32 
     33 /* this tends to get set as part of LLVM_CFLAGS, but we definitely want asserts */
     34 #ifdef NDEBUG
     35 #undef NDEBUG
     36 #endif
     37 
     38 #include "llvm/ADT/ArrayRef.h"
     39 #include "llvm/DerivedTypes.h"
     40 #include "llvm/LLVMContext.h"
     41 #include "llvm/Module.h"
     42 #include "llvm/Analysis/Verifier.h"
     43 #include "llvm/Support/IRBuilder.h"
     44 //#include "llvm/Intrinsics.h"
     45 
     46 #include <vector>
     47 #include <stdio.h>
     48 #include <map>
     49 /*
     50 #ifdef _MSC_VER
     51 #include <unordered_map>
     52 #else
     53 #include <tr1/unordered_map>
     54 #endif
     55 // use C++0x/Microsoft convention
     56 namespace std
     57 {
     58 using namespace tr1;
     59 }
     60 //*/
     61 
     62 #include "ir.h"
     63 #include "ir_visitor.h"
     64 #include "glsl_types.h"
     65 #include "src/mesa/main/mtypes.h"
     66 
     67 // Helper function to convert array to llvm::ArrayRef
     68 template <typename T, size_t N>
     69 static inline llvm::ArrayRef<T> pack(T const (&array)[N]) {
     70    return llvm::ArrayRef<T>(array);
     71 }
     72 
     73 // Helper function to convert pointer + size to llvm::ArrayRef
     74 template <typename T>
     75 static inline llvm::ArrayRef<T> pack(T const *ptr, size_t n) {
     76    return llvm::ArrayRef<T>(ptr, n);
     77 }
     78 
     79 struct GGLState;
     80 
     81 llvm::Value * tex2D(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
     82                      const GGLState * gglCtx);
     83 llvm::Value * texCube(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler,
     84                      const GGLState * gglCtx);
     85 
     86 class ir_to_llvm_visitor : public ir_visitor {
     87    ir_to_llvm_visitor();
     88 public:
     89 
     90 
     91    llvm::LLVMContext& ctx;
     92    llvm::Module* mod;
     93    llvm::Function* fun;
     94    // could easily support more loops, but GLSL doesn't support multiloop break/continue
     95    std::pair<llvm::BasicBlock*, llvm::BasicBlock*> loop;
     96    llvm::BasicBlock* bb;
     97    llvm::Value* result;
     98    llvm::IRBuilder<> bld;
     99 
    100    const GGLState * gglCtx;
    101    const char * shaderSuffix;
    102    llvm::Value * inputsPtr, * outputsPtr, * constantsPtr; // internal globals to store inputs/outputs/constants pointers
    103    llvm::Value * inputs, * outputs, * constants;
    104 
    105    ir_to_llvm_visitor(llvm::Module* p_mod, const GGLState * GGLCtx, const char * suffix)
    106    : ctx(p_mod->getContext()), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0,
    107       (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx), shaderSuffix(suffix),
    108       inputsPtr(NULL), outputsPtr(NULL), constantsPtr(NULL),
    109       inputs(NULL), outputs(NULL), constants(NULL)
    110    {
    111       llvm::PointerType * const floatVecPtrType = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(),4), 0);
    112       llvm::Constant * const nullFloatVecPtr = llvm::Constant::getNullValue(floatVecPtrType);
    113       // make input, output and consts global pointers so they can be used in
    114       // different LLVM functions since the shader shares these "registers" across "functions"
    115 
    116       inputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
    117          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_inputPtr");
    118 
    119       outputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
    120          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_outputsPtr");
    121 
    122       constantsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false,
    123          llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_constantsPtr");
    124    }
    125 
    126    llvm::Type* llvm_base_type(unsigned base_type)
    127    {
    128       switch(base_type)
    129       {
    130       case GLSL_TYPE_VOID:
    131          return llvm::Type::getVoidTy(ctx);
    132       case GLSL_TYPE_UINT:
    133       case GLSL_TYPE_INT:
    134          return llvm::Type::getInt32Ty(ctx);
    135       case GLSL_TYPE_FLOAT:
    136          return llvm::Type::getFloatTy(ctx);
    137       case GLSL_TYPE_BOOL:
    138          return llvm::Type::getInt1Ty(ctx);
    139       case GLSL_TYPE_SAMPLER:
    140          return llvm::PointerType::getUnqual(llvm::Type::getVoidTy(ctx));
    141       default:
    142          assert(0);
    143          return 0;
    144       }
    145    }
    146 
    147    llvm::Type* llvm_vec_type(const glsl_type* type)
    148    {
    149       if (type->is_array())
    150          return llvm::ArrayType::get(llvm_type(type->fields.array), type->array_size());
    151 
    152       if (type->is_record())
    153       {
    154          std::vector<llvm::Type*> fields;
    155          for (unsigned i = 0; i < type->length; i++)
    156             fields.push_back(llvm_type(type->fields.structure[i].type));
    157          return llvm::StructType::get(ctx, llvm::ArrayRef<llvm::Type*>(
    158              fields));
    159       }
    160 
    161       llvm::Type* base_type = llvm_base_type(type->base_type);
    162       if (type->vector_elements <= 1) {
    163          return base_type;
    164       } else {
    165          return llvm::VectorType::get(base_type, type->vector_elements);
    166       }
    167    }
    168 
    169    llvm::Type* llvm_type(const glsl_type* type)
    170    {
    171       llvm::Type* vec_type = llvm_vec_type(type);
    172       if (type->matrix_columns <= 1) {
    173          return vec_type;
    174       } else {
    175          return llvm::ArrayType::get(vec_type, type->matrix_columns);
    176       }
    177    }
    178 
    179    typedef std::map<ir_variable*, llvm::Value*> llvm_variables_t;
    180    //typedef std::unordered_map<ir_variable*, llvm::Value*> llvm_variables_t;
    181    llvm_variables_t llvm_variables;
    182 
    183    llvm::Value* llvm_variable(class ir_variable* var)
    184    {
    185       llvm_variables_t::iterator vari = llvm_variables.find(var);
    186       if (vari != llvm_variables.end()) {
    187          return vari->second;
    188       } else {
    189          llvm::Type* type = llvm_type(var->type);
    190 
    191          llvm::Value* v = NULL;
    192          if(fun) {
    193             if (ir_var_in == var->mode)
    194             {
    195                assert(var->location >= 0);
    196                v = bld.CreateConstGEP1_32(inputs, var->location);
    197                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
    198             }
    199             else if (ir_var_out == var->mode)
    200             {
    201                assert(var->location >= 0);
    202                v = bld.CreateConstGEP1_32(outputs, var->location);
    203                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
    204             }
    205             else if (ir_var_uniform == var->mode)
    206             {
    207                assert(var->location >= 0);
    208                v = bld.CreateConstGEP1_32(constants, var->location);
    209                v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name);
    210             }
    211             else
    212             {
    213                if(bb == &fun->getEntryBlock())
    214                   v = bld.CreateAlloca(type, 0, var->name);
    215                else
    216                   v = new llvm::AllocaInst(type, 0, var->name, fun->getEntryBlock().getTerminator());
    217             }
    218          } else {
    219            // TODO: can anything global be non-constant in GLSL?; fix linkage
    220             //printf("var '%s' mode=%d location=%d \n", var->name, var->mode, var->location);
    221             switch(var->mode)
    222             {
    223                case ir_var_auto: // fall through
    224                case ir_var_temporary:
    225                {
    226                   llvm::Constant * init = llvm::UndefValue::get(llvm_type(var->type));
    227                   if(var->constant_value)
    228                      init = llvm_constant(var->constant_value);
    229                   v = new llvm::GlobalVariable(*mod, type, var->read_only, llvm::GlobalValue::InternalLinkage, init, var->name);
    230                   break;
    231                }
    232                case ir_var_in: // fall through
    233                case ir_var_out: // fall through
    234                case ir_var_uniform: // fall through
    235                   assert(var->location >= 0);
    236                   return NULL; // variable outside of function means declaration
    237                default:
    238                   assert(0);
    239             }
    240 
    241 //            llvm::Function::LinkageTypes linkage;
    242 //            if(var->mode == ir_var_auto || var->mode == ir_var_temporary)
    243 //               linkage = llvm::GlobalValue::InternalLinkage;
    244 //            else
    245 //               linkage = llvm::GlobalValue::ExternalLinkage;
    246 //            llvm::Constant* init = 0;
    247 //            if(var->constant_value)
    248 //            {
    249 //               init = llvm_constant(var->constant_value);
    250 //               // this constants need to be external (ie. written to output)
    251 //               if (llvm::GlobalValue::ExternalLinkage == linkage)
    252 //                  linkage = llvm::GlobalValue::AvailableExternallyLinkage;
    253 //            }
    254 //            else if(linkage == llvm::GlobalValue::InternalLinkage)
    255 //               init = llvm::UndefValue::get(llvm_type(var->type));
    256 //            v = new llvm::GlobalVariable(*mod, type, var->read_only, linkage, init, var->name);
    257          }
    258          assert(v);
    259          llvm_variables[var] = v;
    260          return v;
    261       }
    262    }
    263 
    264    //typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t;
    265    //typedef std::unordered_map<ir_function_signature*, llvm::Function*> llvm_functions_t;
    266    //llvm_functions_t llvm_functions;
    267 
    268    llvm::Function* llvm_function(class ir_function_signature* sig)
    269    {
    270       const char* name = sig->function_name();
    271       char * functionName = (char *)malloc(strlen(name) + strlen(shaderSuffix) + 1);
    272       strcpy(functionName, name);
    273       strcat(functionName, shaderSuffix);
    274       llvm::Function * function = mod->getFunction(functionName);
    275       if (function)
    276       {
    277          free(functionName);
    278          return function;
    279       }
    280       else
    281       {
    282          llvm::Function::LinkageTypes linkage;
    283          std::vector<llvm::Type*> params;
    284          foreach_iter(exec_list_iterator, iter, sig->parameters) {
    285             ir_variable* arg = (ir_variable*)iter.get();
    286             params.push_back(llvm_type(arg->type));
    287          }
    288 
    289          if(!strcmp(name, "main") || !sig->is_defined)
    290          {
    291             linkage = llvm::Function::ExternalLinkage;
    292             llvm::PointerType * vecPtrTy = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(), 4), 0);
    293             assert(0 == params.size());
    294             params.push_back(vecPtrTy); // inputs
    295             params.push_back(vecPtrTy); // outputs
    296             params.push_back(vecPtrTy); // constants
    297          }
    298          else {
    299             linkage = llvm::Function::InternalLinkage;
    300          }
    301          llvm::FunctionType* ft = llvm::FunctionType::get(llvm_type(sig->return_type),
    302                                                           llvm::ArrayRef<llvm::Type*>(params),
    303                                                           false);
    304          function = llvm::Function::Create(ft, linkage, functionName, mod);
    305          free(functionName);
    306          return function;
    307       }
    308    }
    309 
    310    llvm::Value* llvm_value(class ir_instruction* ir)
    311    {
    312       result = 0;
    313       ir->accept(this);
    314       return result;
    315    }
    316 
    317    llvm::Constant* llvm_constant(class ir_instruction* ir)
    318    {
    319       return (llvm::Constant *)llvm_value(ir);
    320       //return &dynamic_cast<llvm::Constant&>(*llvm_value(ir));
    321    }
    322 
    323    llvm::Constant* llvm_int(unsigned v)
    324    {
    325       return llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), v);
    326    }
    327 
    328    llvm::Value* llvm_pointer(class ir_rvalue* ir)
    329    {
    330       if(ir_dereference_variable* deref = ir->as_dereference_variable())
    331          return llvm_variable(deref->variable_referenced());
    332       else if(ir_dereference_array* deref = ir->as_dereference_array())
    333       {
    334          llvm::Value* gep[2] = {llvm_int(0), llvm_value(deref->array_index)};
    335          return bld.CreateInBoundsGEP(llvm_pointer(deref->array), gep, gep + 2);
    336          }
    337       else if(ir->as_dereference())
    338       {
    339          ir_dereference_record* deref = (ir_dereference_record*)ir;
    340          int idx = deref->record->type->field_index(deref->field);
    341          assert(idx >= 0);
    342          return bld.CreateConstInBoundsGEP2_32(llvm_pointer(deref->record), 0, idx);
    343       }
    344       else
    345       {
    346          assert(0);
    347          return 0;
    348       }
    349    }
    350 
    351 //   llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a)
    352 //   {
    353 //      llvm::Type* types[1] = {a->getType()};
    354 //      return bld.CreateCall(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a);
    355 //   }
    356 //
    357 //   llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a, llvm::Value* b)
    358 //   {
    359 //      llvm::Type* types[2] = {a->getType(), b->getType()};
    360 //      /* only one type suffix is usually needed, so pass 1 here */
    361 //      return bld.CreateCall2(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a, b);
    362 //   }
    363 
    364    llvm::Value* llvm_intrinsic_unop(ir_expression_operation op, llvm::Value * op0)
    365    {
    366       llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
    367       const char * name = NULL;
    368       switch (op) {
    369       case ir_unop_sin:
    370          name = "sinf";
    371          break;
    372       case ir_unop_cos:
    373          name = "cosf";
    374          break;
    375       default:
    376          assert(0);
    377       }
    378 
    379       llvm::Function * function = mod->getFunction(name);
    380       if (!function) {
    381          // predeclare the intrinsic
    382          std::vector<llvm::Type*> args;
    383          args.push_back(floatType);
    384          llvm::FunctionType* type = llvm::FunctionType::get(floatType,
    385                                                             llvm::ArrayRef<llvm::Type*>(args),
    386                                                             false);
    387          function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
    388          function->setCallingConv(llvm::CallingConv::C);
    389       }
    390 
    391       return bld.CreateCall(function, op0);
    392    }
    393 
    394    llvm::Value* llvm_intrinsic_binop(ir_expression_operation op, llvm::Value * op0, llvm::Value * op1)
    395    {
    396       llvm::Type * floatType = llvm::Type::getFloatTy(ctx);
    397       const char * name = NULL;
    398       switch (op) {
    399       case ir_binop_pow:
    400          name = "powf";
    401          break;
    402       default:
    403          assert(0);
    404       }
    405 
    406       llvm::Function * function = mod->getFunction(name);
    407       if (!function) {
    408          // predeclare the intrinsic
    409          std::vector<llvm::Type*> args;
    410          args.push_back(floatType);
    411          args.push_back(floatType);
    412          llvm::FunctionType* type = llvm::FunctionType::get(floatType,
    413                                                             llvm::ArrayRef<llvm::Type*>(args),
    414                                                             false);
    415          function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod);
    416          function->setCallingConv(llvm::CallingConv::C);
    417       }
    418 
    419       return bld.CreateCall2(function, op0, op1);
    420    }
    421 
    422    llvm::Constant* llvm_imm(llvm::Type* type, double v)
    423    {
    424       if(type->isVectorTy())
    425       {
    426          std::vector<llvm::Constant*> values;
    427          values.push_back(llvm_imm(((llvm::VectorType*)type)->getElementType(), v));
    428          for(unsigned i = 1; i < ((llvm::VectorType*)type)->getNumElements(); ++i)
    429             values.push_back(values[0]);
    430          return llvm::ConstantVector::get(values);
    431       }
    432       else if(type->isIntegerTy())
    433          return llvm::ConstantInt::get(type, v);
    434       else if(type->isFloatingPointTy())
    435          return llvm::ConstantFP::get(type, v);
    436       else
    437       {
    438          assert(0);
    439          return 0;
    440       }
    441    }
    442 
    443    static llvm::Value* create_shuffle3(llvm::IRBuilder<>& bld, llvm::Value* v, unsigned a, unsigned b, unsigned c, const llvm::Twine& name = "")
    444    {
    445       llvm::Type* int_ty = llvm::Type::getInt32Ty(v->getContext());
    446       llvm::Constant* vals[3] = {llvm::ConstantInt::get(int_ty, a), llvm::ConstantInt::get(int_ty, b), llvm::ConstantInt::get(int_ty, c)};
    447       return bld.CreateShuffleVector(v, llvm::UndefValue::get(v->getType()), llvm::ConstantVector::get(pack(vals)), name);
    448    }
    449 
    450    llvm::Value* create_select(unsigned width, llvm::Value * cond, llvm::Value * tru, llvm::Value * fal, const char * name = "")
    451    {
    452       if (1 == width)
    453          return bld.CreateSelect(cond, tru, fal, name);
    454 
    455       llvm::Type * vectorType = tru->getType();
    456       llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
    457       for (unsigned int i = 0; i < width; i++) {
    458          llvm::Value * c = bld.CreateExtractElement(cond, llvm_int(i));
    459          llvm::Value * t = bld.CreateExtractElement(tru, llvm_int(i));
    460          llvm::Value * f = bld.CreateExtractElement(fal, llvm_int(i));
    461          llvm::Value * v = bld.CreateSelect(c, t, f, name);
    462          vector = bld.CreateInsertElement(vector, v, llvm_int(i), "vslct");
    463       }
    464       return vector;
    465    }
    466 
    467    llvm::Value* create_dot_product(llvm::Value* ops0, llvm::Value* ops1, glsl_base_type type, unsigned width)
    468    {
    469       llvm::Value* prod;
    470       switch (type) {
    471       case GLSL_TYPE_UINT:
    472       case GLSL_TYPE_INT:
    473          prod = bld.CreateMul(ops0, ops1, "dot.mul");
    474          break;
    475       case GLSL_TYPE_FLOAT:
    476          prod = bld.CreateFMul(ops0, ops1, "dot.mul");
    477          break;
    478       default:
    479          assert(0);
    480       }
    481 
    482       if (width<= 1)
    483          return prod;
    484 
    485       llvm::Value* sum = 0;
    486       for (unsigned i = 0; i < width; ++i) {
    487          llvm::Value* elem = bld.CreateExtractElement(prod, llvm_int(i), "dot.elem");
    488          if (sum) {
    489             if (type == GLSL_TYPE_FLOAT)
    490                sum = bld.CreateFAdd(sum, elem, "dot.add");
    491             else
    492                sum = bld.CreateAdd(sum, elem, "dot.add");
    493          }
    494          else
    495             sum = elem;
    496       }
    497       return sum;
    498    }
    499 
    500    llvm::Value* llvm_expression(ir_expression* ir)
    501    {
    502       llvm::Value* ops[2];
    503       for(unsigned i = 0; i < ir->get_num_operands(); ++i)
    504          ops[i] = llvm_value(ir->operands[i]);
    505 
    506       if(ir->get_num_operands() == 2)
    507       {
    508          int vecidx = -1;
    509          int scaidx = -1;
    510          if(ir->operands[0]->type->vector_elements <= 1 && ir->operands[1]->type->vector_elements > 1)
    511          {
    512             scaidx = 0;
    513             vecidx = 1;
    514          }
    515          else if(ir->operands[0]->type->vector_elements > 1 && ir->operands[1]->type->vector_elements <= 1)
    516          {
    517             scaidx = 1;
    518             vecidx = 0;
    519          }
    520          else
    521             assert(ir->operands[0]->type->vector_elements == ir->operands[1]->type->vector_elements);
    522 
    523          if(scaidx >= 0)
    524          {
    525             llvm::Value* vec;
    526             vec = llvm::UndefValue::get(ops[vecidx]->getType());
    527             for(unsigned i = 0; i < ir->operands[vecidx]->type->vector_elements; ++i)
    528                vec = bld.CreateInsertElement(vec,  ops[scaidx], llvm_int(i), "sca2vec");
    529             ops[scaidx] = vec;
    530          }
    531       }
    532 
    533       switch (ir->operation) {
    534       case ir_unop_logic_not:
    535          return bld.CreateNot(ops[0]);
    536       case ir_unop_neg:
    537          switch (ir->operands[0]->type->base_type) {
    538          case GLSL_TYPE_UINT:
    539          case GLSL_TYPE_BOOL:
    540          case GLSL_TYPE_INT:
    541             return bld.CreateNeg(ops[0]);
    542          case GLSL_TYPE_FLOAT:
    543             return bld.CreateFNeg(ops[0]);
    544          default:
    545             assert(0);
    546          }
    547       case ir_unop_abs:
    548          switch (ir->operands[0]->type->base_type) {
    549          case GLSL_TYPE_UINT:
    550          case GLSL_TYPE_BOOL:
    551             return ops[0];
    552          case GLSL_TYPE_INT:
    553             return create_select(ir->operands[0]->type->vector_elements,
    554                                  bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "sabs.ge"),
    555                                  ops[0], bld.CreateNeg(ops[0], "sabs.neg"), "sabs.select");
    556          case GLSL_TYPE_FLOAT:
    557             return create_select(ir->operands[0]->type->vector_elements,
    558                                  bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fabs.ge"),
    559                                  ops[0], bld.CreateFNeg(ops[0], "fabs.neg"), "fabs.select");
    560          default:
    561             assert(0);
    562          }
    563       case ir_unop_sign:
    564          switch (ir->operands[0]->type->base_type) {
    565          case GLSL_TYPE_BOOL:
    566             return ops[0];
    567          case GLSL_TYPE_UINT:
    568             return bld.CreateZExt(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "usign.ne"), ops[0]->getType(), "usign.zext");
    569          case GLSL_TYPE_INT:
    570             return bld.CreateSelect(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ne"),
    571                                     bld.CreateSelect(bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "sabs.selects"),
    572                                     llvm_imm(ops[0]->getType(), 0), "sabs.select0");
    573          case GLSL_TYPE_FLOAT:
    574             return bld.CreateSelect(bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ne"),
    575                                     bld.CreateSelect(bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "fabs.selects"),
    576                                     llvm_imm(ops[0]->getType(), 0), "fabs.select0");
    577          default:
    578             assert(0);
    579          }
    580       case ir_unop_rcp:
    581          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
    582          return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), ops[0]);
    583       case ir_unop_exp: // fall through
    584       case ir_unop_exp2: // fall through
    585       case ir_unop_log: // fall through
    586       case ir_unop_log2: // fall through
    587       case ir_unop_sin: // fall through
    588       case ir_unop_cos:
    589          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
    590          return llvm_intrinsic_unop(ir->operation, ops[0]);
    591          // TODO: implement these somehow
    592       case ir_unop_dFdx:
    593          assert(0);
    594          //return llvm_intrinsic(llvm::Intrinsic::ddx, ops[0]);
    595       case ir_unop_dFdy:
    596          assert(0);
    597          //return llvm_intrinsic(llvm::Intrinsic::ddy, ops[0]);
    598       case ir_binop_add:
    599          switch(ir->operands[0]->type->base_type)
    600          {
    601          case GLSL_TYPE_BOOL:
    602          case GLSL_TYPE_UINT:
    603          case GLSL_TYPE_INT:
    604             return bld.CreateAdd(ops[0], ops[1]);
    605          case GLSL_TYPE_FLOAT:
    606             return bld.CreateFAdd(ops[0], ops[1]);
    607          default:
    608             assert(0);
    609          }
    610       case ir_binop_sub:
    611          switch(ir->operands[0]->type->base_type)
    612          {
    613          case GLSL_TYPE_BOOL:
    614          case GLSL_TYPE_UINT:
    615          case GLSL_TYPE_INT:
    616             return bld.CreateSub(ops[0], ops[1]);
    617          case GLSL_TYPE_FLOAT:
    618             return bld.CreateFSub(ops[0], ops[1]);
    619          default:
    620             assert(0);
    621          }
    622       case ir_binop_mul:
    623          if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_vector())
    624             assert(0);
    625          else if (ir->operands[0]->type->is_vector() && ir->operands[1]->type->is_matrix()) {
    626             assert(0); // matrix multiplication should have been lowered to vector ops
    627 			llvm::VectorType * vectorType = llvm::VectorType::get(llvm_base_type(ir->operands[1]->type->base_type), ir->operands[1]->type->matrix_columns);
    628             llvm::Value * vector = llvm::Constant::getNullValue(vectorType);
    629             for (unsigned int i = 0; i < ir->operands[1]->type->matrix_columns; i++) {
    630                llvm::Value * value = bld.CreateExtractValue(ops[1], i, "vec*mat_col");
    631                value = create_dot_product(value, ops[0], ir->operands[1]->type->base_type, ir->operands[1]->type->vector_elements);
    632                vector = bld.CreateInsertElement(vector, value, llvm_int(i), "vec*mat_res");
    633             }
    634             return vector;
    635          }
    636          else if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_matrix())
    637             assert(0);
    638 
    639          switch (ir->operands[0]->type->base_type) {
    640          case GLSL_TYPE_BOOL:
    641             return bld.CreateAnd(ops[0], ops[1]);
    642          case GLSL_TYPE_UINT:
    643          case GLSL_TYPE_INT:
    644             return bld.CreateMul(ops[0], ops[1]);
    645          case GLSL_TYPE_FLOAT:
    646             return bld.CreateFMul(ops[0], ops[1]);
    647          default:
    648             assert(0);
    649          }
    650          case ir_binop_div:
    651          switch(ir->operands[0]->type->base_type)
    652          {
    653          case GLSL_TYPE_BOOL:
    654          case GLSL_TYPE_UINT:
    655             return bld.CreateUDiv(ops[0], ops[1]);
    656          case GLSL_TYPE_INT:
    657             return bld.CreateSDiv(ops[0], ops[1]);
    658          case GLSL_TYPE_FLOAT:
    659             return bld.CreateFDiv(ops[0], ops[1]);
    660          default:
    661             assert(0);
    662          }
    663       case ir_binop_mod:
    664          switch(ir->operands[0]->type->base_type)
    665          {
    666          case GLSL_TYPE_BOOL:
    667          case GLSL_TYPE_UINT:
    668             return bld.CreateURem(ops[0], ops[1]);
    669          case GLSL_TYPE_INT:
    670             return bld.CreateSRem(ops[0], ops[1]);
    671          case GLSL_TYPE_FLOAT:
    672             return bld.CreateFRem(ops[0], ops[1]);
    673          default:
    674             assert(0);
    675          }
    676       case ir_binop_less:
    677          switch(ir->operands[0]->type->base_type)
    678          {
    679          case GLSL_TYPE_BOOL:
    680          case GLSL_TYPE_UINT:
    681             return bld.CreateICmpULT(ops[0], ops[1]);
    682          case GLSL_TYPE_INT:
    683             return bld.CreateICmpSLT(ops[0], ops[1]);
    684          case GLSL_TYPE_FLOAT:
    685             return bld.CreateFCmpOLT(ops[0], ops[1]);
    686          default:
    687             assert(0);
    688          }
    689       case ir_binop_greater:
    690          switch(ir->operands[0]->type->base_type)
    691          {
    692          case GLSL_TYPE_BOOL:
    693          case GLSL_TYPE_UINT:
    694             return bld.CreateICmpUGT(ops[0], ops[1]);
    695          case GLSL_TYPE_INT:
    696             return bld.CreateICmpSGT(ops[0], ops[1]);
    697          case GLSL_TYPE_FLOAT:
    698             return bld.CreateFCmpOGT(ops[0], ops[1]);
    699          default:
    700             assert(0);
    701          }
    702       case ir_binop_lequal:
    703          switch(ir->operands[0]->type->base_type)
    704          {
    705          case GLSL_TYPE_BOOL:
    706          case GLSL_TYPE_UINT:
    707             return bld.CreateICmpULE(ops[0], ops[1]);
    708          case GLSL_TYPE_INT:
    709             return bld.CreateICmpSLE(ops[0], ops[1]);
    710          case GLSL_TYPE_FLOAT:
    711             return bld.CreateFCmpOLE(ops[0], ops[1]);
    712          default:
    713             assert(0);
    714          }
    715       case ir_binop_gequal:
    716          switch(ir->operands[0]->type->base_type)
    717          {
    718          case GLSL_TYPE_BOOL:
    719          case GLSL_TYPE_UINT:
    720             return bld.CreateICmpUGE(ops[0], ops[1]);
    721          case GLSL_TYPE_INT:
    722             return bld.CreateICmpSGE(ops[0], ops[1]);
    723          case GLSL_TYPE_FLOAT:
    724             return bld.CreateFCmpOGE(ops[0], ops[1]);
    725          default:
    726             assert(0);
    727          }
    728       case ir_binop_equal: // fall through
    729       case ir_binop_all_equal: // TODO: check op same as ir_binop_equal
    730          switch (ir->operands[0]->type->base_type) {
    731          case GLSL_TYPE_BOOL:
    732          case GLSL_TYPE_UINT:
    733          case GLSL_TYPE_INT:
    734             return bld.CreateICmpEQ(ops[0], ops[1]);
    735          case GLSL_TYPE_FLOAT:
    736             return bld.CreateFCmpOEQ(ops[0], ops[1]);
    737          default:
    738             assert(0);
    739          }
    740       case ir_binop_nequal:
    741          switch(ir->operands[0]->type->base_type)
    742          {
    743          case GLSL_TYPE_BOOL:
    744          case GLSL_TYPE_UINT:
    745          case GLSL_TYPE_INT:
    746             return bld.CreateICmpNE(ops[0], ops[1]);
    747          case GLSL_TYPE_FLOAT:
    748             return bld.CreateFCmpONE(ops[0], ops[1]);
    749          default:
    750             assert(0);
    751          }
    752       case ir_binop_logic_xor:
    753          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
    754          return bld.CreateICmpNE(ops[0], ops[1]);
    755       case ir_binop_logic_or:
    756          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
    757          return bld.CreateOr(ops[0], ops[1]);
    758       case ir_binop_logic_and:
    759          assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
    760          return bld.CreateAnd(ops[0], ops[1]);
    761       case ir_binop_dot:
    762          return create_dot_product(ops[0], ops[1], ir->operands[0]->type->base_type, ir->operands[0]->type->vector_elements);
    763 //      case ir_binop_cross: this op does not exist in ir.h
    764 //         assert(ir->operands[0]->type->vector_elements == 3);
    765 //         switch(ir->operands[0]->type->base_type)
    766 //         {
    767 //         case GLSL_TYPE_UINT:
    768 //         case GLSL_TYPE_INT:
    769 //            return bld.CreateSub(
    770 //                  bld.CreateMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
    771 //                  bld.CreateMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
    772 //                  "cross.sub");
    773 //         case GLSL_TYPE_FLOAT:
    774 //            return bld.CreateFSub(
    775 //                  bld.CreateFMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"),
    776 //                  bld.CreateFMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"),
    777 //                  "cross.sub");
    778 //         default:
    779 //            assert(0);
    780 //         }
    781       case ir_unop_sqrt:
    782          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
    783          return llvm_intrinsic_unop(ir->operation, ops[0]);
    784       case ir_unop_rsq:
    785          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
    786          return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), llvm_intrinsic_unop(ir_unop_sqrt, ops[0]), "rsqrt.rcp");
    787       case ir_unop_i2f:
    788          return bld.CreateSIToFP(ops[0], llvm_type(ir->type));
    789       case ir_unop_u2f:
    790       case ir_unop_b2f:
    791          return bld.CreateUIToFP(ops[0], llvm_type(ir->type));
    792       case ir_unop_b2i:
    793          return bld.CreateZExt(ops[0], llvm_type(ir->type));
    794       case ir_unop_f2i:
    795          return bld.CreateFPToSI(ops[0], llvm_type(ir->type));
    796       case ir_unop_f2b:
    797          return bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0));
    798       case ir_unop_i2b:
    799          return bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0));
    800       case ir_unop_trunc:
    801       {
    802          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
    803             return ops[0];
    804          glsl_type int_type = *ir->operands[0]->type;
    805          int_type.base_type = GLSL_TYPE_INT;
    806          return bld.CreateSIToFP(bld.CreateFPToSI(ops[0], llvm_type(&int_type), "trunc.fptosi"),ops[0]->getType(), "trunc.sitofp");
    807       }
    808       case ir_unop_floor:
    809       {
    810          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
    811             return ops[0];
    812          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
    813          return bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one));
    814       }
    815       case ir_unop_ceil:
    816       {
    817          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
    818             return ops[0];
    819          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
    820          return bld.CreateFAdd(bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one)), one);
    821       }
    822       case ir_unop_fract:
    823       {
    824          if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT)
    825             return llvm_imm(ops[0]->getType(), 0);
    826          llvm::Value* one = llvm_imm(ops[0]->getType(), 1);
    827          return bld.CreateFRem(ops[0], one);
    828       }
    829       // TODO: NaNs might be wrong in min/max, not sure how to fix it
    830       case ir_binop_min:
    831          switch(ir->operands[0]->type->base_type)
    832          {
    833          case GLSL_TYPE_BOOL:
    834             return bld.CreateAnd(ops[0], ops[1], "bmin");
    835          case GLSL_TYPE_UINT:
    836             return bld.CreateSelect(bld.CreateICmpULE(ops[0], ops[1], "umin.le"), ops[0], ops[1], "umin.select");
    837          case GLSL_TYPE_INT:
    838             return bld.CreateSelect(bld.CreateICmpSLE(ops[0], ops[1], "smin.le"), ops[0], ops[1], "smin.select");
    839          case GLSL_TYPE_FLOAT:
    840             return bld.CreateSelect(bld.CreateFCmpULE(ops[0], ops[1], "fmin.le"), ops[0], ops[1], "fmin.select");
    841          default:
    842             assert(0);
    843          }
    844       case ir_binop_max:
    845          switch(ir->operands[0]->type->base_type)
    846          {
    847          case GLSL_TYPE_BOOL:
    848             return bld.CreateOr(ops[0], ops[1], "bmax");
    849          case GLSL_TYPE_UINT:
    850             return bld.CreateSelect(bld.CreateICmpUGE(ops[0], ops[1], "umax.ge"), ops[0], ops[1], "umax.select");
    851          case GLSL_TYPE_INT:
    852             return bld.CreateSelect(bld.CreateICmpSGE(ops[0], ops[1], "smax.ge"), ops[0], ops[1], "smax.select");
    853          case GLSL_TYPE_FLOAT:
    854             return bld.CreateSelect(bld.CreateFCmpUGE(ops[0], ops[1], "fmax.ge"), ops[0], ops[1], "fmax.select");
    855          default:
    856             assert(0);
    857          }
    858       case ir_binop_pow:
    859          assert(GLSL_TYPE_FLOAT == ir->operands[0]->type->base_type);
    860          assert(GLSL_TYPE_FLOAT == ir->operands[1]->type->base_type);
    861          return llvm_intrinsic_binop(ir_binop_pow, ops[0], ops[1]);
    862       case ir_unop_bit_not:
    863          return bld.CreateNot(ops[0]);
    864       case ir_binop_bit_and:
    865          return bld.CreateAnd(ops[0], ops[1]);
    866       case ir_binop_bit_xor:
    867          return bld.CreateXor(ops[0], ops[1]);
    868       case ir_binop_bit_or:
    869          return bld.CreateOr(ops[0], ops[1]);
    870       case ir_binop_lshift:
    871          switch(ir->operands[0]->type->base_type)
    872          {
    873          case GLSL_TYPE_BOOL:
    874          case GLSL_TYPE_UINT:
    875          case GLSL_TYPE_INT:
    876             return bld.CreateLShr(ops[0], ops[1]);
    877          default:
    878             assert(0);
    879          }
    880       case ir_binop_rshift:
    881          switch(ir->operands[0]->type->base_type)
    882          {
    883          case GLSL_TYPE_BOOL:
    884          case GLSL_TYPE_UINT:
    885             return bld.CreateLShr(ops[0], ops[1]);
    886          case GLSL_TYPE_INT:
    887             return bld.CreateAShr(ops[0], ops[1]);
    888          default:
    889             assert(0);
    890             return 0;
    891          }
    892       default:
    893          printf("ir->operation=%d \n", ir->operation);
    894          assert(0);
    895          return 0;
    896       }
    897    }
    898 
    899    virtual void visit(class ir_expression * ir)
    900    {
    901       result = llvm_expression(ir);
    902    }
    903 
    904    virtual void visit(class ir_dereference_array *ir)
    905    {
    906       result = bld.CreateLoad(llvm_pointer(ir));
    907    }
    908 
    909    virtual void visit(class ir_dereference_record *ir)
    910    {
    911       result = bld.CreateLoad(llvm_pointer(ir));
    912    }
    913 
    914    virtual void visit(class ir_dereference_variable *ir)
    915    {
    916       result = bld.CreateLoad(llvm_pointer(ir), ir->variable_referenced()->name);
    917    }
    918 
    919    virtual void visit(class ir_texture * ir)
    920    {
    921       llvm::Value * coordinate = llvm_value(ir->coordinate);
    922       if (ir->projector)
    923       {
    924          llvm::Value * proj = llvm_value(ir->projector);
    925          unsigned width = ((llvm::VectorType*)coordinate->getType())->getNumElements();
    926          llvm::Value * div = llvm::Constant::getNullValue(coordinate->getType());
    927          for (unsigned i = 0; i < width; i++)
    928             div = bld.CreateInsertElement(div, proj, bld.getInt32(i), "texProjDup");
    929          coordinate = bld.CreateFDiv(coordinate, div, "texProj");
    930       }
    931 
    932       ir_variable * sampler = NULL;
    933       if(ir_dereference_variable* deref = ir->sampler->as_dereference_variable())
    934          sampler = deref->variable_referenced();
    935       else if(ir_dereference_array* deref = ir->sampler->as_dereference_array())
    936       {
    937          assert(0); // not implemented
    938          return;
    939          deref->array_index;
    940          deref->array;
    941       }
    942       else if(ir->sampler->as_dereference())
    943       {
    944          assert(0); // not implemented
    945          ir_dereference_record* deref = (ir_dereference_record*)ir->sampler;
    946          int idx = deref->record->type->field_index(deref->field);
    947          assert(idx >= 0);
    948       }
    949       else
    950          assert(0);
    951 
    952       assert(sampler->location >= 0 && sampler->location < 64); // TODO: proper limit
    953 
    954       // ESSL texture LOD is only for 2D texture in vert shader, and it's explicit
    955       // bias used only in frag shader, and added to computed LOD
    956       assert(ir_tex == ir->op);
    957 
    958       assert(GLSL_TYPE_FLOAT == sampler->type->sampler_type);
    959       printf("sampler '%s' location=%d dim=%d type=%d proj=%d lod=%d \n", sampler->name, sampler->location,
    960          sampler->type->sampler_dimensionality, sampler->type->sampler_type,
    961          ir->projector ? 1 : 0, ir->lod_info.lod ? 1 : 0);
    962       if (GLSL_SAMPLER_DIM_CUBE == sampler->type->sampler_dimensionality)
    963          result = texCube(bld, coordinate, sampler->location, gglCtx);
    964       else if (GLSL_SAMPLER_DIM_2D == sampler->type->sampler_dimensionality)
    965          result = tex2D(bld, coordinate, sampler->location, gglCtx);
    966       else
    967          assert(0);
    968    }
    969 
    970    virtual void visit(class ir_discard * ir)
    971    {
    972       llvm::BasicBlock* discard = llvm::BasicBlock::Create(ctx, "discard", fun);
    973       llvm::BasicBlock* after;
    974       if(ir->condition)
    975       {
    976          after = llvm::BasicBlock::Create(ctx, "discard.survived", fun);
    977          bld.CreateCondBr(llvm_value(ir->condition), discard, after);
    978       }
    979       else
    980       {
    981          after = llvm::BasicBlock::Create(ctx, "dead_code.discard", fun);
    982          bld.CreateBr(discard);
    983       }
    984 
    985       bld.SetInsertPoint(discard);
    986       bld.CreateUnwind();
    987 
    988       bb = after;
    989       bld.SetInsertPoint(bb);
    990    }
    991 
    992    virtual void visit(class ir_loop_jump *ir)
    993    {
    994       llvm::BasicBlock* target;
    995       if(ir->mode == ir_loop_jump::jump_continue)
    996          target = loop.first;
    997       else if(ir->mode == ir_loop_jump::jump_break)
    998          target = loop.second;
    999       assert(target);
   1000 
   1001       bld.CreateBr(target);
   1002 
   1003       bb = llvm::BasicBlock::Create(ctx, "dead_code.jump", fun);
   1004       bld.SetInsertPoint(bb);
   1005    }
   1006 
   1007    virtual void visit(class ir_loop * ir)
   1008    {
   1009       llvm::BasicBlock* body = llvm::BasicBlock::Create(ctx, "loop", fun);
   1010       llvm::BasicBlock* header = body;
   1011       llvm::BasicBlock* after = llvm::BasicBlock::Create(ctx, "loop.after", fun);
   1012       llvm::Value* ctr;
   1013 
   1014       if(ir->counter)
   1015       {
   1016          ctr = llvm_variable(ir->counter);
   1017          if(ir->from)
   1018             bld.CreateStore(llvm_value(ir->from), ctr);
   1019          if(ir->to)
   1020             header = llvm::BasicBlock::Create(ctx, "loop.header", fun);
   1021       }
   1022 
   1023       bld.CreateBr(header);
   1024 
   1025       if(ir->counter && ir->to)
   1026       {
   1027          bld.SetInsertPoint(header);
   1028          llvm::Value* cond;
   1029          llvm::Value* load = bld.CreateLoad(ctr);
   1030          llvm::Value* to = llvm_value(ir->to);
   1031          switch(ir->counter->type->base_type)
   1032          {
   1033          case GLSL_TYPE_BOOL:
   1034          case GLSL_TYPE_UINT:
   1035             cond = bld.CreateICmpULT(load, to);
   1036             break;
   1037          case GLSL_TYPE_INT:
   1038             cond = bld.CreateICmpSLT(load, to);
   1039             break;
   1040          case GLSL_TYPE_FLOAT:
   1041             cond = bld.CreateFCmpOLT(load, to);
   1042             break;
   1043          }
   1044          bld.CreateCondBr(cond, body, after);
   1045       }
   1046 
   1047       bld.SetInsertPoint(body);
   1048 
   1049       std::pair<llvm::BasicBlock*, llvm::BasicBlock*> saved_loop = loop;
   1050       loop = std::make_pair(header, after);
   1051       visit_exec_list(&ir->body_instructions, this);
   1052       loop = saved_loop;
   1053 
   1054       if(ir->counter && ir->increment)
   1055       {
   1056          switch(ir->counter->type->base_type)
   1057          {
   1058          case GLSL_TYPE_BOOL:
   1059          case GLSL_TYPE_UINT:
   1060          case GLSL_TYPE_INT:
   1061             bld.CreateStore(bld.CreateAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
   1062             break;
   1063          case GLSL_TYPE_FLOAT:
   1064             bld.CreateStore(bld.CreateFAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr);
   1065             break;
   1066          }
   1067       }
   1068       bld.CreateBr(header);
   1069 
   1070       bb = after;
   1071       bld.SetInsertPoint(bb);
   1072    }
   1073 
   1074    virtual void visit(class ir_if *ir)
   1075    {
   1076       llvm::BasicBlock* bbt = llvm::BasicBlock::Create(ctx, "if", fun);
   1077       llvm::BasicBlock* bbf = llvm::BasicBlock::Create(ctx, "else", fun);
   1078       llvm::BasicBlock* bbe = llvm::BasicBlock::Create(ctx, "endif", fun);
   1079       bld.CreateCondBr(llvm_value(ir->condition), bbt, bbf);
   1080 
   1081       bld.SetInsertPoint(bbt);
   1082       visit_exec_list(&ir->then_instructions, this);
   1083       bld.CreateBr(bbe);
   1084 
   1085       bld.SetInsertPoint(bbf);
   1086       visit_exec_list(&ir->else_instructions, this);
   1087       bld.CreateBr(bbe);
   1088 
   1089       bb = bbe;
   1090       bld.SetInsertPoint(bb);
   1091    }
   1092 
   1093    virtual void visit(class ir_return * ir)
   1094    {
   1095       if(!ir->value)
   1096          bld.CreateRetVoid();
   1097       else
   1098          bld.CreateRet(llvm_value(ir->value));
   1099 
   1100       bb = llvm::BasicBlock::Create(ctx, "dead_code.return", fun);
   1101       bld.SetInsertPoint(bb);
   1102    }
   1103 
   1104    virtual void visit(class ir_call * ir)
   1105    {
   1106       std::vector<llvm::Value*> args;
   1107 
   1108       foreach_iter(exec_list_iterator, iter, *ir)
   1109       {
   1110          ir_rvalue *arg = (ir_constant *)iter.get();
   1111          args.push_back(llvm_value(arg));
   1112       }
   1113 
   1114       result = bld.CreateCall(llvm_function(ir->get_callee()), llvm::ArrayRef<llvm::Value*>(args));
   1115 
   1116       llvm::AttrListPtr attr;
   1117       ((llvm::CallInst*)result)->setAttributes(attr);
   1118    }
   1119 
   1120    virtual void visit(class ir_constant * ir)
   1121    {
   1122       if (ir->type->base_type == GLSL_TYPE_STRUCT) {
   1123          std::vector<llvm::Constant*> fields;
   1124          foreach_iter(exec_list_iterator, iter, ir->components) {
   1125             ir_constant *field = (ir_constant *)iter.get();
   1126             fields.push_back(llvm_constant(field));
   1127          }
   1128          result = llvm::ConstantStruct::get((llvm::StructType*)llvm_type(ir->type), fields);
   1129       }
   1130       else if (ir->type->base_type == GLSL_TYPE_ARRAY) {
   1131          std::vector<llvm::Constant*> elems;
   1132          for (unsigned i = 0; i < ir->type->length; i++)
   1133             elems.push_back(llvm_constant(ir->array_elements[i]));
   1134          result = llvm::ConstantArray::get((llvm::ArrayType*)llvm_type(ir->type), elems);
   1135       }
   1136       else
   1137       {
   1138          llvm::Type* base_type = llvm_base_type(ir->type->base_type);
   1139          llvm::Type* vec_type = llvm_vec_type(ir->type);
   1140          llvm::Type* type = llvm_type(ir->type);
   1141 
   1142          std::vector<llvm::Constant*> vecs;
   1143          unsigned idx = 0;
   1144          for (unsigned i = 0; i < ir->type->matrix_columns; ++i) {
   1145             std::vector<llvm::Constant*> elems;
   1146             for (unsigned j = 0; j < ir->type->vector_elements; ++j) {
   1147                llvm::Constant* elem;
   1148                switch(ir->type->base_type)
   1149                {
   1150                case GLSL_TYPE_FLOAT:
   1151                   elem = llvm::ConstantFP::get(base_type, ir->value.f[idx]);
   1152                   break;
   1153                case GLSL_TYPE_UINT:
   1154                   elem = llvm::ConstantInt::get(base_type, ir->value.u[idx]);
   1155                   break;
   1156                case GLSL_TYPE_INT:
   1157                   elem = llvm::ConstantInt::get(base_type, ir->value.i[idx]);
   1158                   break;
   1159                case GLSL_TYPE_BOOL:
   1160                   elem = llvm::ConstantInt::get(base_type, ir->value.b[idx]);
   1161                   break;
   1162                }
   1163                elems.push_back(elem);
   1164                ++idx;
   1165             }
   1166 
   1167             llvm::Constant* vec;
   1168             if(ir->type->vector_elements > 1) {
   1169                llvm::ArrayRef<llvm::Constant*> ConstantArray(elems);
   1170                vec = llvm::ConstantVector::get(ConstantArray);
   1171             } else {
   1172                vec = elems[0];
   1173             }
   1174             vecs.push_back(vec);
   1175          }
   1176 
   1177          if(ir->type->matrix_columns > 1)
   1178             result = llvm::ConstantArray::get((llvm::ArrayType*)type, vecs);
   1179          else
   1180             result = vecs[0];
   1181       }
   1182    }
   1183 
   1184    llvm::Value* llvm_shuffle(llvm::Value* val, int* shuffle_mask, unsigned res_width, const llvm::Twine &name = "")
   1185    {
   1186       llvm::Type* elem_type = val->getType();
   1187       llvm::Type* res_type = elem_type;;
   1188       unsigned val_width = 1;
   1189       if(val->getType()->isVectorTy())
   1190       {
   1191          val_width = ((llvm::VectorType*)val->getType())->getNumElements();
   1192          elem_type = ((llvm::VectorType*)val->getType())->getElementType();
   1193       }
   1194       if(res_width > 1)
   1195          res_type = llvm::VectorType::get(elem_type, res_width);
   1196 
   1197       llvm::Constant* shuffle_mask_values[4];
   1198       assert(res_width <= 4);
   1199       bool any_def = false;
   1200       for(unsigned i = 0; i < res_width; ++i)
   1201       {
   1202          if(shuffle_mask[i] < 0)
   1203             shuffle_mask_values[i] = llvm::UndefValue::get(llvm::Type::getInt32Ty(ctx));
   1204          else
   1205          {
   1206             any_def = true;
   1207             shuffle_mask_values[i] = llvm_int(shuffle_mask[i]);
   1208          }
   1209       }
   1210 
   1211       llvm::Value* undef = llvm::UndefValue::get(res_type);
   1212       if(!any_def)
   1213          return undef;
   1214 
   1215       if(val_width > 1)
   1216       {
   1217          if(res_width > 1)
   1218          {
   1219             if(val_width == res_width)
   1220             {
   1221                bool nontrivial = false;
   1222                for(unsigned i = 0; i < val_width; ++i)
   1223                {
   1224                   if(shuffle_mask[i] != (int)i)
   1225                      nontrivial = true;
   1226                }
   1227                if(!nontrivial)
   1228                   return val;
   1229             }
   1230 
   1231             return bld.CreateShuffleVector(val, llvm::UndefValue::get(val->getType()), llvm::ConstantVector::get(pack(shuffle_mask_values, res_width)), name);
   1232          }
   1233          else
   1234             return bld.CreateExtractElement(val, llvm_int(shuffle_mask[0]), name);
   1235       }
   1236       else
   1237       {
   1238          if(res_width > 1)
   1239          {
   1240             llvm::Value* tmp = undef;
   1241             for(unsigned i = 0; i < res_width; ++i)
   1242             {
   1243                if(shuffle_mask[i] >= 0)
   1244                tmp = bld.CreateInsertElement(tmp, val, llvm_int(i), name);
   1245             }
   1246             return tmp;
   1247          }
   1248          else if(shuffle_mask[0] >= 0)
   1249             return val;
   1250          else
   1251             return undef;
   1252       }
   1253    }
   1254 
   1255 
   1256    virtual void visit(class ir_swizzle * swz)
   1257    {
   1258       llvm::Value* val = llvm_value(swz->val);
   1259       int mask[4] = {swz->mask.x, swz->mask.y, swz->mask.z, swz->mask.w};
   1260       result = llvm_shuffle(val, mask, swz->mask.num_components, "swizzle");
   1261    }
   1262 
   1263    virtual void visit(class ir_assignment * ir)
   1264    {
   1265       llvm::Value* lhs = llvm_pointer(ir->lhs);
   1266       llvm::Value* rhs = llvm_value(ir->rhs);
   1267       unsigned width = ir->lhs->type->vector_elements;
   1268       unsigned mask = (1 << width) - 1;
   1269       assert(rhs);
   1270 
   1271       // TODO: masking for matrix assignment
   1272       if (ir->rhs->type->is_matrix()) {
   1273          bld.CreateStore(rhs, lhs, "mat_str");
   1274          return;
   1275       }
   1276 
   1277       if (!(ir->write_mask & mask))
   1278          return;
   1279 
   1280       if (ir->rhs->type->vector_elements < width) {
   1281          int expand_mask[4] = {-1, -1, -1, -1};
   1282          for (unsigned i = 0; i < ir->lhs->type->vector_elements; ++i)
   1283             expand_mask[i] = i;
   1284 //         printf("ve: %u w %u issw: %i\n", ir->rhs->type->vector_elements, width, !!ir->rhs->as_swizzle());
   1285          rhs = llvm_shuffle(rhs, expand_mask, width, "assign.expand");
   1286       }
   1287 
   1288       if (width > 1 && (ir->write_mask & mask) != mask) {
   1289          llvm::Constant* blend_mask[4];
   1290          // refer to ir.h: ir_assignment::write_mask
   1291          // A partially-set write mask means that each enabled channel gets
   1292          // the value from a consecutive channel of the rhs.
   1293          unsigned rhsChannel = 0;
   1294          for (unsigned i = 0; i < width; ++i) {
   1295             if (ir->write_mask & (1 << i))
   1296                blend_mask[i] = llvm_int(width + rhsChannel++);
   1297             else
   1298                blend_mask[i] = llvm_int(i);
   1299          }
   1300          rhs = bld.CreateShuffleVector(bld.CreateLoad(lhs), rhs, llvm::ConstantVector::get(pack(blend_mask, width)), "assign.writemask");
   1301       }
   1302 
   1303       if(ir->condition)
   1304          rhs = bld.CreateSelect(llvm_value(ir->condition), rhs, bld.CreateLoad(lhs), "assign.conditional");
   1305 
   1306       bld.CreateStore(rhs, lhs);
   1307    }
   1308 
   1309    virtual void visit(class ir_variable * var)
   1310    {
   1311       llvm_variable(var);
   1312    }
   1313 
   1314    virtual void visit(ir_function_signature *sig)
   1315    {
   1316       if(!sig->is_defined)
   1317          return;
   1318 
   1319       assert(!fun);
   1320       fun = llvm_function(sig);
   1321 
   1322       bb = llvm::BasicBlock::Create(ctx, "entry", fun);
   1323       bld.SetInsertPoint(bb);
   1324 
   1325       llvm::Function::arg_iterator ai = fun->arg_begin();
   1326       if (!strcmp("main",sig->function_name()))
   1327       {
   1328          assert(3 == fun->arg_size());
   1329          bld.CreateStore(ai, inputsPtr);
   1330          inputs = ai;
   1331          ai++;
   1332          bld.CreateStore(ai, outputsPtr);
   1333          outputs = ai;
   1334          ai++;
   1335          bld.CreateStore(ai, constantsPtr);
   1336          constants = ai;
   1337          ai++;
   1338       }
   1339       else
   1340       {
   1341          foreach_iter(exec_list_iterator, iter, sig->parameters) {
   1342             ir_variable* arg = (ir_variable*)iter.get();
   1343             ai->setName(arg->name);
   1344             bld.CreateStore(ai, llvm_variable(arg));
   1345             ++ai;
   1346          }
   1347          inputs = bld.CreateLoad(inputsPtr);
   1348          outputs = bld.CreateLoad(outputsPtr);
   1349          constants = bld.CreateLoad(constantsPtr);
   1350       }
   1351       inputs->setName("gl_inputs");
   1352       outputs->setName("gl_outputs");
   1353       constants->setName("gl_constants");
   1354 
   1355 
   1356 
   1357       foreach_iter(exec_list_iterator, iter, sig->body) {
   1358          ir_instruction *ir = (ir_instruction *)iter.get();
   1359 
   1360          ir->accept(this);
   1361       }
   1362 
   1363       if(fun->getReturnType()->isVoidTy())
   1364          bld.CreateRetVoid();
   1365       else
   1366          bld.CreateRet(llvm::UndefValue::get(fun->getReturnType()));
   1367 
   1368       bb = NULL;
   1369       fun = NULL;
   1370    }
   1371 
   1372    virtual void visit(class ir_function * funs)
   1373    {
   1374       foreach_iter(exec_list_iterator, iter, *funs)
   1375       {
   1376          ir_function_signature* sig = (ir_function_signature*)iter.get();
   1377          sig->accept(this);
   1378       }
   1379    }
   1380 };
   1381 
   1382 struct llvm::Module *
   1383 glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod,
   1384                         const struct GGLState * gglCtx, const char * shaderSuffix)
   1385 {
   1386    ir_to_llvm_visitor v(mod, gglCtx, shaderSuffix);
   1387 
   1388    visit_exec_list(ir, &v);
   1389 
   1390 //   mod->dump();
   1391    if(llvm::verifyModule(*mod, llvm::PrintMessageAction, 0))
   1392    {
   1393       puts("**\n module verification failed **\n");
   1394       mod->dump();
   1395       assert(0);
   1396       return NULL;
   1397    }
   1398 
   1399    return mod;
   1400    //v.ir_to_llvm_emit_op1(NULL, OPCODE_END, ir_to_llvm_undef_dst, ir_to_llvm_undef);
   1401 }
   1402