1 /* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright 2010 Intel Corporation 5 * Copyright 2010 Luca Barbieri 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27 /** 28 * \file ir_to_llvm.cpp 29 * 30 * Translates the IR to LLVM 31 */ 32 33 /* this tends to get set as part of LLVM_CFLAGS, but we definitely want asserts */ 34 #ifdef NDEBUG 35 #undef NDEBUG 36 #endif 37 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/DerivedTypes.h" 40 #include "llvm/LLVMContext.h" 41 #include "llvm/Module.h" 42 #include "llvm/Analysis/Verifier.h" 43 #include "llvm/Support/IRBuilder.h" 44 //#include "llvm/Intrinsics.h" 45 46 #include <vector> 47 #include <stdio.h> 48 #include <map> 49 /* 50 #ifdef _MSC_VER 51 #include <unordered_map> 52 #else 53 #include <tr1/unordered_map> 54 #endif 55 // use C++0x/Microsoft convention 56 namespace std 57 { 58 using namespace tr1; 59 } 60 //*/ 61 62 #include "ir.h" 63 #include "ir_visitor.h" 64 #include "glsl_types.h" 65 #include "src/mesa/main/mtypes.h" 66 67 // Helper function to convert array to llvm::ArrayRef 68 template <typename T, size_t N> 69 static inline llvm::ArrayRef<T> pack(T const (&array)[N]) { 70 return llvm::ArrayRef<T>(array); 71 } 72 73 // Helper function to convert pointer + size to llvm::ArrayRef 74 template <typename T> 75 static inline llvm::ArrayRef<T> pack(T const *ptr, size_t n) { 76 return llvm::ArrayRef<T>(ptr, n); 77 } 78 79 struct GGLState; 80 81 llvm::Value * tex2D(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler, 82 const GGLState * gglCtx); 83 llvm::Value * texCube(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler, 84 const GGLState * gglCtx); 85 86 class ir_to_llvm_visitor : public ir_visitor { 87 ir_to_llvm_visitor(); 88 public: 89 90 91 llvm::LLVMContext& ctx; 92 llvm::Module* mod; 93 llvm::Function* fun; 94 // could easily support more loops, but GLSL doesn't support multiloop break/continue 95 std::pair<llvm::BasicBlock*, llvm::BasicBlock*> loop; 96 llvm::BasicBlock* bb; 97 llvm::Value* result; 98 llvm::IRBuilder<> bld; 99 100 const GGLState * gglCtx; 101 const char * shaderSuffix; 102 llvm::Value * inputsPtr, * outputsPtr, * constantsPtr; // internal globals to store inputs/outputs/constants pointers 103 llvm::Value * inputs, * outputs, * constants; 104 105 ir_to_llvm_visitor(llvm::Module* p_mod, const GGLState * GGLCtx, const char * suffix) 106 : ctx(p_mod->getContext()), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0, 107 (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx), shaderSuffix(suffix), 108 inputsPtr(NULL), outputsPtr(NULL), constantsPtr(NULL), 109 inputs(NULL), outputs(NULL), constants(NULL) 110 { 111 llvm::PointerType * const floatVecPtrType = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(),4), 0); 112 llvm::Constant * const nullFloatVecPtr = llvm::Constant::getNullValue(floatVecPtrType); 113 // make input, output and consts global pointers so they can be used in 114 // different LLVM functions since the shader shares these "registers" across "functions" 115 116 inputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false, 117 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_inputPtr"); 118 119 outputsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false, 120 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_outputsPtr"); 121 122 constantsPtr = new llvm::GlobalVariable(*mod, floatVecPtrType, false, 123 llvm::GlobalValue::InternalLinkage, nullFloatVecPtr, "gl_constantsPtr"); 124 } 125 126 llvm::Type* llvm_base_type(unsigned base_type) 127 { 128 switch(base_type) 129 { 130 case GLSL_TYPE_VOID: 131 return llvm::Type::getVoidTy(ctx); 132 case GLSL_TYPE_UINT: 133 case GLSL_TYPE_INT: 134 return llvm::Type::getInt32Ty(ctx); 135 case GLSL_TYPE_FLOAT: 136 return llvm::Type::getFloatTy(ctx); 137 case GLSL_TYPE_BOOL: 138 return llvm::Type::getInt1Ty(ctx); 139 case GLSL_TYPE_SAMPLER: 140 return llvm::PointerType::getUnqual(llvm::Type::getVoidTy(ctx)); 141 default: 142 assert(0); 143 return 0; 144 } 145 } 146 147 llvm::Type* llvm_vec_type(const glsl_type* type) 148 { 149 if (type->is_array()) 150 return llvm::ArrayType::get(llvm_type(type->fields.array), type->array_size()); 151 152 if (type->is_record()) 153 { 154 std::vector<llvm::Type*> fields; 155 for (unsigned i = 0; i < type->length; i++) 156 fields.push_back(llvm_type(type->fields.structure[i].type)); 157 return llvm::StructType::get(ctx, llvm::ArrayRef<llvm::Type*>( 158 fields)); 159 } 160 161 llvm::Type* base_type = llvm_base_type(type->base_type); 162 if (type->vector_elements <= 1) { 163 return base_type; 164 } else { 165 return llvm::VectorType::get(base_type, type->vector_elements); 166 } 167 } 168 169 llvm::Type* llvm_type(const glsl_type* type) 170 { 171 llvm::Type* vec_type = llvm_vec_type(type); 172 if (type->matrix_columns <= 1) { 173 return vec_type; 174 } else { 175 return llvm::ArrayType::get(vec_type, type->matrix_columns); 176 } 177 } 178 179 typedef std::map<ir_variable*, llvm::Value*> llvm_variables_t; 180 //typedef std::unordered_map<ir_variable*, llvm::Value*> llvm_variables_t; 181 llvm_variables_t llvm_variables; 182 183 llvm::Value* llvm_variable(class ir_variable* var) 184 { 185 llvm_variables_t::iterator vari = llvm_variables.find(var); 186 if (vari != llvm_variables.end()) { 187 return vari->second; 188 } else { 189 llvm::Type* type = llvm_type(var->type); 190 191 llvm::Value* v = NULL; 192 if(fun) { 193 if (ir_var_in == var->mode) 194 { 195 assert(var->location >= 0); 196 v = bld.CreateConstGEP1_32(inputs, var->location); 197 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name); 198 } 199 else if (ir_var_out == var->mode) 200 { 201 assert(var->location >= 0); 202 v = bld.CreateConstGEP1_32(outputs, var->location); 203 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name); 204 } 205 else if (ir_var_uniform == var->mode) 206 { 207 assert(var->location >= 0); 208 v = bld.CreateConstGEP1_32(constants, var->location); 209 v = bld.CreateBitCast(v, llvm::PointerType::get(llvm_type(var->type), 0), var->name); 210 } 211 else 212 { 213 if(bb == &fun->getEntryBlock()) 214 v = bld.CreateAlloca(type, 0, var->name); 215 else 216 v = new llvm::AllocaInst(type, 0, var->name, fun->getEntryBlock().getTerminator()); 217 } 218 } else { 219 // TODO: can anything global be non-constant in GLSL?; fix linkage 220 //printf("var '%s' mode=%d location=%d \n", var->name, var->mode, var->location); 221 switch(var->mode) 222 { 223 case ir_var_auto: // fall through 224 case ir_var_temporary: 225 { 226 llvm::Constant * init = llvm::UndefValue::get(llvm_type(var->type)); 227 if(var->constant_value) 228 init = llvm_constant(var->constant_value); 229 v = new llvm::GlobalVariable(*mod, type, var->read_only, llvm::GlobalValue::InternalLinkage, init, var->name); 230 break; 231 } 232 case ir_var_in: // fall through 233 case ir_var_out: // fall through 234 case ir_var_uniform: // fall through 235 assert(var->location >= 0); 236 return NULL; // variable outside of function means declaration 237 default: 238 assert(0); 239 } 240 241 // llvm::Function::LinkageTypes linkage; 242 // if(var->mode == ir_var_auto || var->mode == ir_var_temporary) 243 // linkage = llvm::GlobalValue::InternalLinkage; 244 // else 245 // linkage = llvm::GlobalValue::ExternalLinkage; 246 // llvm::Constant* init = 0; 247 // if(var->constant_value) 248 // { 249 // init = llvm_constant(var->constant_value); 250 // // this constants need to be external (ie. written to output) 251 // if (llvm::GlobalValue::ExternalLinkage == linkage) 252 // linkage = llvm::GlobalValue::AvailableExternallyLinkage; 253 // } 254 // else if(linkage == llvm::GlobalValue::InternalLinkage) 255 // init = llvm::UndefValue::get(llvm_type(var->type)); 256 // v = new llvm::GlobalVariable(*mod, type, var->read_only, linkage, init, var->name); 257 } 258 assert(v); 259 llvm_variables[var] = v; 260 return v; 261 } 262 } 263 264 //typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t; 265 //typedef std::unordered_map<ir_function_signature*, llvm::Function*> llvm_functions_t; 266 //llvm_functions_t llvm_functions; 267 268 llvm::Function* llvm_function(class ir_function_signature* sig) 269 { 270 const char* name = sig->function_name(); 271 char * functionName = (char *)malloc(strlen(name) + strlen(shaderSuffix) + 1); 272 strcpy(functionName, name); 273 strcat(functionName, shaderSuffix); 274 llvm::Function * function = mod->getFunction(functionName); 275 if (function) 276 { 277 free(functionName); 278 return function; 279 } 280 else 281 { 282 llvm::Function::LinkageTypes linkage; 283 std::vector<llvm::Type*> params; 284 foreach_iter(exec_list_iterator, iter, sig->parameters) { 285 ir_variable* arg = (ir_variable*)iter.get(); 286 params.push_back(llvm_type(arg->type)); 287 } 288 289 if(!strcmp(name, "main") || !sig->is_defined) 290 { 291 linkage = llvm::Function::ExternalLinkage; 292 llvm::PointerType * vecPtrTy = llvm::PointerType::get(llvm::VectorType::get(bld.getFloatTy(), 4), 0); 293 assert(0 == params.size()); 294 params.push_back(vecPtrTy); // inputs 295 params.push_back(vecPtrTy); // outputs 296 params.push_back(vecPtrTy); // constants 297 } 298 else { 299 linkage = llvm::Function::InternalLinkage; 300 } 301 llvm::FunctionType* ft = llvm::FunctionType::get(llvm_type(sig->return_type), 302 llvm::ArrayRef<llvm::Type*>(params), 303 false); 304 function = llvm::Function::Create(ft, linkage, functionName, mod); 305 free(functionName); 306 return function; 307 } 308 } 309 310 llvm::Value* llvm_value(class ir_instruction* ir) 311 { 312 result = 0; 313 ir->accept(this); 314 return result; 315 } 316 317 llvm::Constant* llvm_constant(class ir_instruction* ir) 318 { 319 return (llvm::Constant *)llvm_value(ir); 320 //return &dynamic_cast<llvm::Constant&>(*llvm_value(ir)); 321 } 322 323 llvm::Constant* llvm_int(unsigned v) 324 { 325 return llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), v); 326 } 327 328 llvm::Value* llvm_pointer(class ir_rvalue* ir) 329 { 330 if(ir_dereference_variable* deref = ir->as_dereference_variable()) 331 return llvm_variable(deref->variable_referenced()); 332 else if(ir_dereference_array* deref = ir->as_dereference_array()) 333 { 334 llvm::Value* gep[2] = {llvm_int(0), llvm_value(deref->array_index)}; 335 return bld.CreateInBoundsGEP(llvm_pointer(deref->array), gep); 336 } 337 else if(ir->as_dereference()) 338 { 339 ir_dereference_record* deref = (ir_dereference_record*)ir; 340 int idx = deref->record->type->field_index(deref->field); 341 assert(idx >= 0); 342 return bld.CreateConstInBoundsGEP2_32(llvm_pointer(deref->record), 0, idx); 343 } 344 else 345 { 346 assert(0); 347 return 0; 348 } 349 } 350 351 // llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a) 352 // { 353 // llvm::Type* types[1] = {a->getType()}; 354 // return bld.CreateCall(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a); 355 // } 356 // 357 // llvm::Value* llvm_intrinsic(llvm::Intrinsic::ID id, llvm::Value* a, llvm::Value* b) 358 // { 359 // llvm::Type* types[2] = {a->getType(), b->getType()}; 360 // /* only one type suffix is usually needed, so pass 1 here */ 361 // return bld.CreateCall2(llvm::Intrinsic::getDeclaration(mod, id, types, 1), a, b); 362 // } 363 364 llvm::Value* llvm_intrinsic_unop(ir_expression_operation op, llvm::Value * op0) 365 { 366 llvm::Type * floatType = llvm::Type::getFloatTy(ctx); 367 const char * name = NULL; 368 switch (op) { 369 case ir_unop_sin: 370 name = "sinf"; 371 break; 372 case ir_unop_cos: 373 name = "cosf"; 374 break; 375 default: 376 assert(0); 377 } 378 379 llvm::Function * function = mod->getFunction(name); 380 if (!function) { 381 // predeclare the intrinsic 382 std::vector<llvm::Type*> args; 383 args.push_back(floatType); 384 llvm::FunctionType* type = llvm::FunctionType::get(floatType, 385 llvm::ArrayRef<llvm::Type*>(args), 386 false); 387 function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod); 388 function->setCallingConv(llvm::CallingConv::C); 389 } 390 391 return bld.CreateCall(function, op0); 392 } 393 394 llvm::Value* llvm_intrinsic_binop(ir_expression_operation op, llvm::Value * op0, llvm::Value * op1) 395 { 396 llvm::Type * floatType = llvm::Type::getFloatTy(ctx); 397 const char * name = NULL; 398 switch (op) { 399 case ir_binop_pow: 400 name = "powf"; 401 break; 402 default: 403 assert(0); 404 } 405 406 llvm::Function * function = mod->getFunction(name); 407 if (!function) { 408 // predeclare the intrinsic 409 std::vector<llvm::Type*> args; 410 args.push_back(floatType); 411 args.push_back(floatType); 412 llvm::FunctionType* type = llvm::FunctionType::get(floatType, 413 llvm::ArrayRef<llvm::Type*>(args), 414 false); 415 function = llvm::Function::Create(type, llvm::Function::ExternalLinkage, name, mod); 416 function->setCallingConv(llvm::CallingConv::C); 417 } 418 419 return bld.CreateCall2(function, op0, op1); 420 } 421 422 llvm::Constant* llvm_imm(llvm::Type* type, double v) 423 { 424 if(type->isVectorTy()) 425 { 426 std::vector<llvm::Constant*> values; 427 values.push_back(llvm_imm(((llvm::VectorType*)type)->getElementType(), v)); 428 for(unsigned i = 1; i < ((llvm::VectorType*)type)->getNumElements(); ++i) 429 values.push_back(values[0]); 430 return llvm::ConstantVector::get(values); 431 } 432 else if(type->isIntegerTy()) 433 return llvm::ConstantInt::get(type, v); 434 else if(type->isFloatingPointTy()) 435 return llvm::ConstantFP::get(type, v); 436 else 437 { 438 assert(0); 439 return 0; 440 } 441 } 442 443 static llvm::Value* create_shuffle3(llvm::IRBuilder<>& bld, llvm::Value* v, unsigned a, unsigned b, unsigned c, const llvm::Twine& name = "") 444 { 445 llvm::Type* int_ty = llvm::Type::getInt32Ty(v->getContext()); 446 llvm::Constant* vals[3] = {llvm::ConstantInt::get(int_ty, a), llvm::ConstantInt::get(int_ty, b), llvm::ConstantInt::get(int_ty, c)}; 447 return bld.CreateShuffleVector(v, llvm::UndefValue::get(v->getType()), llvm::ConstantVector::get(pack(vals)), name); 448 } 449 450 llvm::Value* create_select(unsigned width, llvm::Value * cond, llvm::Value * tru, llvm::Value * fal, const char * name = "") 451 { 452 if (1 == width) 453 return bld.CreateSelect(cond, tru, fal, name); 454 455 llvm::Type * vectorType = tru->getType(); 456 llvm::Value * vector = llvm::Constant::getNullValue(vectorType); 457 for (unsigned int i = 0; i < width; i++) { 458 llvm::Value * c = bld.CreateExtractElement(cond, llvm_int(i)); 459 llvm::Value * t = bld.CreateExtractElement(tru, llvm_int(i)); 460 llvm::Value * f = bld.CreateExtractElement(fal, llvm_int(i)); 461 llvm::Value * v = bld.CreateSelect(c, t, f, name); 462 vector = bld.CreateInsertElement(vector, v, llvm_int(i), "vslct"); 463 } 464 return vector; 465 } 466 467 llvm::Value* create_dot_product(llvm::Value* ops0, llvm::Value* ops1, glsl_base_type type, unsigned width) 468 { 469 llvm::Value* prod; 470 switch (type) { 471 case GLSL_TYPE_UINT: 472 case GLSL_TYPE_INT: 473 prod = bld.CreateMul(ops0, ops1, "dot.mul"); 474 break; 475 case GLSL_TYPE_FLOAT: 476 prod = bld.CreateFMul(ops0, ops1, "dot.mul"); 477 break; 478 default: 479 assert(0); 480 } 481 482 if (width<= 1) 483 return prod; 484 485 llvm::Value* sum = 0; 486 for (unsigned i = 0; i < width; ++i) { 487 llvm::Value* elem = bld.CreateExtractElement(prod, llvm_int(i), "dot.elem"); 488 if (sum) { 489 if (type == GLSL_TYPE_FLOAT) 490 sum = bld.CreateFAdd(sum, elem, "dot.add"); 491 else 492 sum = bld.CreateAdd(sum, elem, "dot.add"); 493 } 494 else 495 sum = elem; 496 } 497 return sum; 498 } 499 500 llvm::Value* llvm_expression(ir_expression* ir) 501 { 502 llvm::Value* ops[2]; 503 for(unsigned i = 0; i < ir->get_num_operands(); ++i) 504 ops[i] = llvm_value(ir->operands[i]); 505 506 if(ir->get_num_operands() == 2) 507 { 508 int vecidx = -1; 509 int scaidx = -1; 510 if(ir->operands[0]->type->vector_elements <= 1 && ir->operands[1]->type->vector_elements > 1) 511 { 512 scaidx = 0; 513 vecidx = 1; 514 } 515 else if(ir->operands[0]->type->vector_elements > 1 && ir->operands[1]->type->vector_elements <= 1) 516 { 517 scaidx = 1; 518 vecidx = 0; 519 } 520 else 521 assert(ir->operands[0]->type->vector_elements == ir->operands[1]->type->vector_elements); 522 523 if(scaidx >= 0) 524 { 525 llvm::Value* vec; 526 vec = llvm::UndefValue::get(ops[vecidx]->getType()); 527 for(unsigned i = 0; i < ir->operands[vecidx]->type->vector_elements; ++i) 528 vec = bld.CreateInsertElement(vec, ops[scaidx], llvm_int(i), "sca2vec"); 529 ops[scaidx] = vec; 530 } 531 } 532 533 switch (ir->operation) { 534 case ir_unop_logic_not: 535 return bld.CreateNot(ops[0]); 536 case ir_unop_neg: 537 switch (ir->operands[0]->type->base_type) { 538 case GLSL_TYPE_UINT: 539 case GLSL_TYPE_BOOL: 540 case GLSL_TYPE_INT: 541 return bld.CreateNeg(ops[0]); 542 case GLSL_TYPE_FLOAT: 543 return bld.CreateFNeg(ops[0]); 544 default: 545 assert(0); 546 } 547 case ir_unop_abs: 548 switch (ir->operands[0]->type->base_type) { 549 case GLSL_TYPE_UINT: 550 case GLSL_TYPE_BOOL: 551 return ops[0]; 552 case GLSL_TYPE_INT: 553 return create_select(ir->operands[0]->type->vector_elements, 554 bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "sabs.ge"), 555 ops[0], bld.CreateNeg(ops[0], "sabs.neg"), "sabs.select"); 556 case GLSL_TYPE_FLOAT: 557 return create_select(ir->operands[0]->type->vector_elements, 558 bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fabs.ge"), 559 ops[0], bld.CreateFNeg(ops[0], "fabs.neg"), "fabs.select"); 560 default: 561 assert(0); 562 } 563 case ir_unop_sign: 564 switch (ir->operands[0]->type->base_type) { 565 case GLSL_TYPE_BOOL: 566 return ops[0]; 567 case GLSL_TYPE_UINT: 568 return bld.CreateZExt(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "usign.ne"), ops[0]->getType(), "usign.zext"); 569 case GLSL_TYPE_INT: 570 return bld.CreateSelect(bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ne"), 571 bld.CreateSelect(bld.CreateICmpSGE(ops[0], llvm_imm(ops[0]->getType(), 0), "ssign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "sabs.selects"), 572 llvm_imm(ops[0]->getType(), 0), "sabs.select0"); 573 case GLSL_TYPE_FLOAT: 574 return bld.CreateSelect(bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ne"), 575 bld.CreateSelect(bld.CreateFCmpUGE(ops[0], llvm_imm(ops[0]->getType(), 0), "fsign.ge"), llvm_imm(ops[0]->getType(), 1), llvm_imm(ops[0]->getType(), -1), "fabs.selects"), 576 llvm_imm(ops[0]->getType(), 0), "fabs.select0"); 577 default: 578 assert(0); 579 } 580 case ir_unop_rcp: 581 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 582 return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), ops[0]); 583 case ir_unop_exp: // fall through 584 case ir_unop_exp2: // fall through 585 case ir_unop_log: // fall through 586 case ir_unop_log2: // fall through 587 case ir_unop_sin: // fall through 588 case ir_unop_cos: 589 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 590 return llvm_intrinsic_unop(ir->operation, ops[0]); 591 // TODO: implement these somehow 592 case ir_unop_dFdx: 593 assert(0); 594 //return llvm_intrinsic(llvm::Intrinsic::ddx, ops[0]); 595 case ir_unop_dFdy: 596 assert(0); 597 //return llvm_intrinsic(llvm::Intrinsic::ddy, ops[0]); 598 case ir_binop_add: 599 switch(ir->operands[0]->type->base_type) 600 { 601 case GLSL_TYPE_BOOL: 602 case GLSL_TYPE_UINT: 603 case GLSL_TYPE_INT: 604 return bld.CreateAdd(ops[0], ops[1]); 605 case GLSL_TYPE_FLOAT: 606 return bld.CreateFAdd(ops[0], ops[1]); 607 default: 608 assert(0); 609 } 610 case ir_binop_sub: 611 switch(ir->operands[0]->type->base_type) 612 { 613 case GLSL_TYPE_BOOL: 614 case GLSL_TYPE_UINT: 615 case GLSL_TYPE_INT: 616 return bld.CreateSub(ops[0], ops[1]); 617 case GLSL_TYPE_FLOAT: 618 return bld.CreateFSub(ops[0], ops[1]); 619 default: 620 assert(0); 621 } 622 case ir_binop_mul: 623 if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_vector()) 624 assert(0); 625 else if (ir->operands[0]->type->is_vector() && ir->operands[1]->type->is_matrix()) { 626 assert(0); // matrix multiplication should have been lowered to vector ops 627 llvm::VectorType * vectorType = llvm::VectorType::get(llvm_base_type(ir->operands[1]->type->base_type), ir->operands[1]->type->matrix_columns); 628 llvm::Value * vector = llvm::Constant::getNullValue(vectorType); 629 for (unsigned int i = 0; i < ir->operands[1]->type->matrix_columns; i++) { 630 llvm::Value * value = bld.CreateExtractValue(ops[1], i, "vec*mat_col"); 631 value = create_dot_product(value, ops[0], ir->operands[1]->type->base_type, ir->operands[1]->type->vector_elements); 632 vector = bld.CreateInsertElement(vector, value, llvm_int(i), "vec*mat_res"); 633 } 634 return vector; 635 } 636 else if (ir->operands[0]->type->is_matrix() && ir->operands[1]->type->is_matrix()) 637 assert(0); 638 639 switch (ir->operands[0]->type->base_type) { 640 case GLSL_TYPE_BOOL: 641 return bld.CreateAnd(ops[0], ops[1]); 642 case GLSL_TYPE_UINT: 643 case GLSL_TYPE_INT: 644 return bld.CreateMul(ops[0], ops[1]); 645 case GLSL_TYPE_FLOAT: 646 return bld.CreateFMul(ops[0], ops[1]); 647 default: 648 assert(0); 649 } 650 case ir_binop_div: 651 switch(ir->operands[0]->type->base_type) 652 { 653 case GLSL_TYPE_BOOL: 654 case GLSL_TYPE_UINT: 655 return bld.CreateUDiv(ops[0], ops[1]); 656 case GLSL_TYPE_INT: 657 return bld.CreateSDiv(ops[0], ops[1]); 658 case GLSL_TYPE_FLOAT: 659 return bld.CreateFDiv(ops[0], ops[1]); 660 default: 661 assert(0); 662 } 663 case ir_binop_mod: 664 switch(ir->operands[0]->type->base_type) 665 { 666 case GLSL_TYPE_BOOL: 667 case GLSL_TYPE_UINT: 668 return bld.CreateURem(ops[0], ops[1]); 669 case GLSL_TYPE_INT: 670 return bld.CreateSRem(ops[0], ops[1]); 671 case GLSL_TYPE_FLOAT: 672 return bld.CreateFRem(ops[0], ops[1]); 673 default: 674 assert(0); 675 } 676 case ir_binop_less: 677 switch(ir->operands[0]->type->base_type) 678 { 679 case GLSL_TYPE_BOOL: 680 case GLSL_TYPE_UINT: 681 return bld.CreateICmpULT(ops[0], ops[1]); 682 case GLSL_TYPE_INT: 683 return bld.CreateICmpSLT(ops[0], ops[1]); 684 case GLSL_TYPE_FLOAT: 685 return bld.CreateFCmpOLT(ops[0], ops[1]); 686 default: 687 assert(0); 688 } 689 case ir_binop_greater: 690 switch(ir->operands[0]->type->base_type) 691 { 692 case GLSL_TYPE_BOOL: 693 case GLSL_TYPE_UINT: 694 return bld.CreateICmpUGT(ops[0], ops[1]); 695 case GLSL_TYPE_INT: 696 return bld.CreateICmpSGT(ops[0], ops[1]); 697 case GLSL_TYPE_FLOAT: 698 return bld.CreateFCmpOGT(ops[0], ops[1]); 699 default: 700 assert(0); 701 } 702 case ir_binop_lequal: 703 switch(ir->operands[0]->type->base_type) 704 { 705 case GLSL_TYPE_BOOL: 706 case GLSL_TYPE_UINT: 707 return bld.CreateICmpULE(ops[0], ops[1]); 708 case GLSL_TYPE_INT: 709 return bld.CreateICmpSLE(ops[0], ops[1]); 710 case GLSL_TYPE_FLOAT: 711 return bld.CreateFCmpOLE(ops[0], ops[1]); 712 default: 713 assert(0); 714 } 715 case ir_binop_gequal: 716 switch(ir->operands[0]->type->base_type) 717 { 718 case GLSL_TYPE_BOOL: 719 case GLSL_TYPE_UINT: 720 return bld.CreateICmpUGE(ops[0], ops[1]); 721 case GLSL_TYPE_INT: 722 return bld.CreateICmpSGE(ops[0], ops[1]); 723 case GLSL_TYPE_FLOAT: 724 return bld.CreateFCmpOGE(ops[0], ops[1]); 725 default: 726 assert(0); 727 } 728 case ir_binop_equal: // fall through 729 case ir_binop_all_equal: // TODO: check op same as ir_binop_equal 730 switch (ir->operands[0]->type->base_type) { 731 case GLSL_TYPE_BOOL: 732 case GLSL_TYPE_UINT: 733 case GLSL_TYPE_INT: 734 return bld.CreateICmpEQ(ops[0], ops[1]); 735 case GLSL_TYPE_FLOAT: 736 return bld.CreateFCmpOEQ(ops[0], ops[1]); 737 default: 738 assert(0); 739 } 740 case ir_binop_nequal: 741 switch(ir->operands[0]->type->base_type) 742 { 743 case GLSL_TYPE_BOOL: 744 case GLSL_TYPE_UINT: 745 case GLSL_TYPE_INT: 746 return bld.CreateICmpNE(ops[0], ops[1]); 747 case GLSL_TYPE_FLOAT: 748 return bld.CreateFCmpONE(ops[0], ops[1]); 749 default: 750 assert(0); 751 } 752 case ir_binop_logic_xor: 753 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); 754 return bld.CreateICmpNE(ops[0], ops[1]); 755 case ir_binop_logic_or: 756 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); 757 return bld.CreateOr(ops[0], ops[1]); 758 case ir_binop_logic_and: 759 assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); 760 return bld.CreateAnd(ops[0], ops[1]); 761 case ir_binop_dot: 762 return create_dot_product(ops[0], ops[1], ir->operands[0]->type->base_type, ir->operands[0]->type->vector_elements); 763 // case ir_binop_cross: this op does not exist in ir.h 764 // assert(ir->operands[0]->type->vector_elements == 3); 765 // switch(ir->operands[0]->type->base_type) 766 // { 767 // case GLSL_TYPE_UINT: 768 // case GLSL_TYPE_INT: 769 // return bld.CreateSub( 770 // bld.CreateMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"), 771 // bld.CreateMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"), 772 // "cross.sub"); 773 // case GLSL_TYPE_FLOAT: 774 // return bld.CreateFSub( 775 // bld.CreateFMul(create_shuffle3(bld, ops[0], 1, 2, 0, "cross.a120"), create_shuffle3(bld, ops[1], 2, 0, 1, "cross.a201"), "cross.ab"), 776 // bld.CreateFMul(create_shuffle3(bld, ops[1], 1, 2, 0, "cross.b120"), create_shuffle3(bld, ops[0], 2, 0, 1, "cross.b201"), "cross.ba"), 777 // "cross.sub"); 778 // default: 779 // assert(0); 780 // } 781 case ir_unop_sqrt: 782 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 783 return llvm_intrinsic_unop(ir->operation, ops[0]); 784 case ir_unop_rsq: 785 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 786 return bld.CreateFDiv(llvm_imm(ops[0]->getType(), 1), llvm_intrinsic_unop(ir_unop_sqrt, ops[0]), "rsqrt.rcp"); 787 case ir_unop_i2f: 788 return bld.CreateSIToFP(ops[0], llvm_type(ir->type)); 789 case ir_unop_u2f: 790 case ir_unop_b2f: 791 return bld.CreateUIToFP(ops[0], llvm_type(ir->type)); 792 case ir_unop_b2i: 793 return bld.CreateZExt(ops[0], llvm_type(ir->type)); 794 case ir_unop_f2i: 795 return bld.CreateFPToSI(ops[0], llvm_type(ir->type)); 796 case ir_unop_f2b: 797 return bld.CreateFCmpONE(ops[0], llvm_imm(ops[0]->getType(), 0)); 798 case ir_unop_i2b: 799 return bld.CreateICmpNE(ops[0], llvm_imm(ops[0]->getType(), 0)); 800 case ir_unop_trunc: 801 { 802 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT) 803 return ops[0]; 804 glsl_type int_type = *ir->operands[0]->type; 805 int_type.base_type = GLSL_TYPE_INT; 806 return bld.CreateSIToFP(bld.CreateFPToSI(ops[0], llvm_type(&int_type), "trunc.fptosi"),ops[0]->getType(), "trunc.sitofp"); 807 } 808 case ir_unop_floor: 809 { 810 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT) 811 return ops[0]; 812 llvm::Value* one = llvm_imm(ops[0]->getType(), 1); 813 return bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one)); 814 } 815 case ir_unop_ceil: 816 { 817 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT) 818 return ops[0]; 819 llvm::Value* one = llvm_imm(ops[0]->getType(), 1); 820 return bld.CreateFAdd(bld.CreateFSub(ops[0], bld.CreateFRem(ops[0], one)), one); 821 } 822 case ir_unop_fract: 823 { 824 if(ir->operands[0]->type->base_type != GLSL_TYPE_FLOAT) 825 return llvm_imm(ops[0]->getType(), 0); 826 llvm::Value* one = llvm_imm(ops[0]->getType(), 1); 827 return bld.CreateFRem(ops[0], one); 828 } 829 // TODO: NaNs might be wrong in min/max, not sure how to fix it 830 case ir_binop_min: 831 switch(ir->operands[0]->type->base_type) 832 { 833 case GLSL_TYPE_BOOL: 834 return bld.CreateAnd(ops[0], ops[1], "bmin"); 835 case GLSL_TYPE_UINT: 836 return bld.CreateSelect(bld.CreateICmpULE(ops[0], ops[1], "umin.le"), ops[0], ops[1], "umin.select"); 837 case GLSL_TYPE_INT: 838 return bld.CreateSelect(bld.CreateICmpSLE(ops[0], ops[1], "smin.le"), ops[0], ops[1], "smin.select"); 839 case GLSL_TYPE_FLOAT: 840 return bld.CreateSelect(bld.CreateFCmpULE(ops[0], ops[1], "fmin.le"), ops[0], ops[1], "fmin.select"); 841 default: 842 assert(0); 843 } 844 case ir_binop_max: 845 switch(ir->operands[0]->type->base_type) 846 { 847 case GLSL_TYPE_BOOL: 848 return bld.CreateOr(ops[0], ops[1], "bmax"); 849 case GLSL_TYPE_UINT: 850 return bld.CreateSelect(bld.CreateICmpUGE(ops[0], ops[1], "umax.ge"), ops[0], ops[1], "umax.select"); 851 case GLSL_TYPE_INT: 852 return bld.CreateSelect(bld.CreateICmpSGE(ops[0], ops[1], "smax.ge"), ops[0], ops[1], "smax.select"); 853 case GLSL_TYPE_FLOAT: 854 return bld.CreateSelect(bld.CreateFCmpUGE(ops[0], ops[1], "fmax.ge"), ops[0], ops[1], "fmax.select"); 855 default: 856 assert(0); 857 } 858 case ir_binop_pow: 859 assert(GLSL_TYPE_FLOAT == ir->operands[0]->type->base_type); 860 assert(GLSL_TYPE_FLOAT == ir->operands[1]->type->base_type); 861 return llvm_intrinsic_binop(ir_binop_pow, ops[0], ops[1]); 862 case ir_unop_bit_not: 863 return bld.CreateNot(ops[0]); 864 case ir_binop_bit_and: 865 return bld.CreateAnd(ops[0], ops[1]); 866 case ir_binop_bit_xor: 867 return bld.CreateXor(ops[0], ops[1]); 868 case ir_binop_bit_or: 869 return bld.CreateOr(ops[0], ops[1]); 870 case ir_binop_lshift: 871 switch(ir->operands[0]->type->base_type) 872 { 873 case GLSL_TYPE_BOOL: 874 case GLSL_TYPE_UINT: 875 case GLSL_TYPE_INT: 876 return bld.CreateLShr(ops[0], ops[1]); 877 default: 878 assert(0); 879 } 880 case ir_binop_rshift: 881 switch(ir->operands[0]->type->base_type) 882 { 883 case GLSL_TYPE_BOOL: 884 case GLSL_TYPE_UINT: 885 return bld.CreateLShr(ops[0], ops[1]); 886 case GLSL_TYPE_INT: 887 return bld.CreateAShr(ops[0], ops[1]); 888 default: 889 assert(0); 890 return 0; 891 } 892 default: 893 printf("ir->operation=%d \n", ir->operation); 894 assert(0); 895 return 0; 896 } 897 } 898 899 virtual void visit(class ir_expression * ir) 900 { 901 result = llvm_expression(ir); 902 } 903 904 virtual void visit(class ir_dereference_array *ir) 905 { 906 result = bld.CreateLoad(llvm_pointer(ir)); 907 } 908 909 virtual void visit(class ir_dereference_record *ir) 910 { 911 result = bld.CreateLoad(llvm_pointer(ir)); 912 } 913 914 virtual void visit(class ir_dereference_variable *ir) 915 { 916 result = bld.CreateLoad(llvm_pointer(ir), ir->variable_referenced()->name); 917 } 918 919 virtual void visit(class ir_texture * ir) 920 { 921 llvm::Value * coordinate = llvm_value(ir->coordinate); 922 if (ir->projector) 923 { 924 llvm::Value * proj = llvm_value(ir->projector); 925 unsigned width = ((llvm::VectorType*)coordinate->getType())->getNumElements(); 926 llvm::Value * div = llvm::Constant::getNullValue(coordinate->getType()); 927 for (unsigned i = 0; i < width; i++) 928 div = bld.CreateInsertElement(div, proj, bld.getInt32(i), "texProjDup"); 929 coordinate = bld.CreateFDiv(coordinate, div, "texProj"); 930 } 931 932 ir_variable * sampler = NULL; 933 if(ir_dereference_variable* deref = ir->sampler->as_dereference_variable()) 934 sampler = deref->variable_referenced(); 935 else if(ir_dereference_array* deref = ir->sampler->as_dereference_array()) 936 { 937 assert(0); // not implemented 938 return; 939 deref->array_index; 940 deref->array; 941 } 942 else if(ir->sampler->as_dereference()) 943 { 944 assert(0); // not implemented 945 ir_dereference_record* deref = (ir_dereference_record*)ir->sampler; 946 int idx = deref->record->type->field_index(deref->field); 947 assert(idx >= 0); 948 } 949 else 950 assert(0); 951 952 assert(sampler->location >= 0 && sampler->location < 64); // TODO: proper limit 953 954 // ESSL texture LOD is only for 2D texture in vert shader, and it's explicit 955 // bias used only in frag shader, and added to computed LOD 956 assert(ir_tex == ir->op); 957 958 assert(GLSL_TYPE_FLOAT == sampler->type->sampler_type); 959 printf("sampler '%s' location=%d dim=%d type=%d proj=%d lod=%d \n", sampler->name, sampler->location, 960 sampler->type->sampler_dimensionality, sampler->type->sampler_type, 961 ir->projector ? 1 : 0, ir->lod_info.lod ? 1 : 0); 962 if (GLSL_SAMPLER_DIM_CUBE == sampler->type->sampler_dimensionality) 963 result = texCube(bld, coordinate, sampler->location, gglCtx); 964 else if (GLSL_SAMPLER_DIM_2D == sampler->type->sampler_dimensionality) 965 result = tex2D(bld, coordinate, sampler->location, gglCtx); 966 else 967 assert(0); 968 } 969 970 virtual void visit(class ir_discard * ir) 971 { 972 llvm::BasicBlock* discard = llvm::BasicBlock::Create(ctx, "discard", fun); 973 llvm::BasicBlock* after; 974 if(ir->condition) 975 { 976 after = llvm::BasicBlock::Create(ctx, "discard.survived", fun); 977 bld.CreateCondBr(llvm_value(ir->condition), discard, after); 978 } 979 else 980 { 981 after = llvm::BasicBlock::Create(ctx, "dead_code.discard", fun); 982 bld.CreateBr(discard); 983 } 984 985 bld.SetInsertPoint(discard); 986 987 // FIXME: According to the LLVM mailing list, UnwindInst should not 988 // be used by the frontend since LLVM 3.0, and 'CreateUnwind' 989 // method has been removed from the IRBuilder. Here's the 990 // temporary workaround. But it would be better to remove 991 // this in the future. 992 // 993 // A solution after LLVM 3.0: To add a global boolean in the shader to 994 // store whether it was discarded or not and just continue on normally, 995 // and handle the discard outside the shader, in the scanline function. 996 // The discard instruction is not used frequently, so it should be okay 997 // performance wise. 998 new llvm::UnwindInst(ctx, discard); /// Deprecated 999 1000 bb = after; 1001 bld.SetInsertPoint(bb); 1002 } 1003 1004 virtual void visit(class ir_loop_jump *ir) 1005 { 1006 llvm::BasicBlock* target; 1007 if(ir->mode == ir_loop_jump::jump_continue) 1008 target = loop.first; 1009 else if(ir->mode == ir_loop_jump::jump_break) 1010 target = loop.second; 1011 assert(target); 1012 1013 bld.CreateBr(target); 1014 1015 bb = llvm::BasicBlock::Create(ctx, "dead_code.jump", fun); 1016 bld.SetInsertPoint(bb); 1017 } 1018 1019 virtual void visit(class ir_loop * ir) 1020 { 1021 llvm::BasicBlock* body = llvm::BasicBlock::Create(ctx, "loop", fun); 1022 llvm::BasicBlock* header = body; 1023 llvm::BasicBlock* after = llvm::BasicBlock::Create(ctx, "loop.after", fun); 1024 llvm::Value* ctr; 1025 1026 if(ir->counter) 1027 { 1028 ctr = llvm_variable(ir->counter); 1029 if(ir->from) 1030 bld.CreateStore(llvm_value(ir->from), ctr); 1031 if(ir->to) 1032 header = llvm::BasicBlock::Create(ctx, "loop.header", fun); 1033 } 1034 1035 bld.CreateBr(header); 1036 1037 if(ir->counter && ir->to) 1038 { 1039 bld.SetInsertPoint(header); 1040 llvm::Value* cond; 1041 llvm::Value* load = bld.CreateLoad(ctr); 1042 llvm::Value* to = llvm_value(ir->to); 1043 switch(ir->counter->type->base_type) 1044 { 1045 case GLSL_TYPE_BOOL: 1046 case GLSL_TYPE_UINT: 1047 cond = bld.CreateICmpULT(load, to); 1048 break; 1049 case GLSL_TYPE_INT: 1050 cond = bld.CreateICmpSLT(load, to); 1051 break; 1052 case GLSL_TYPE_FLOAT: 1053 cond = bld.CreateFCmpOLT(load, to); 1054 break; 1055 } 1056 bld.CreateCondBr(cond, body, after); 1057 } 1058 1059 bld.SetInsertPoint(body); 1060 1061 std::pair<llvm::BasicBlock*, llvm::BasicBlock*> saved_loop = loop; 1062 loop = std::make_pair(header, after); 1063 visit_exec_list(&ir->body_instructions, this); 1064 loop = saved_loop; 1065 1066 if(ir->counter && ir->increment) 1067 { 1068 switch(ir->counter->type->base_type) 1069 { 1070 case GLSL_TYPE_BOOL: 1071 case GLSL_TYPE_UINT: 1072 case GLSL_TYPE_INT: 1073 bld.CreateStore(bld.CreateAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr); 1074 break; 1075 case GLSL_TYPE_FLOAT: 1076 bld.CreateStore(bld.CreateFAdd(bld.CreateLoad(ctr), llvm_value(ir->increment)), ctr); 1077 break; 1078 } 1079 } 1080 bld.CreateBr(header); 1081 1082 bb = after; 1083 bld.SetInsertPoint(bb); 1084 } 1085 1086 virtual void visit(class ir_if *ir) 1087 { 1088 llvm::BasicBlock* bbt = llvm::BasicBlock::Create(ctx, "if", fun); 1089 llvm::BasicBlock* bbf = llvm::BasicBlock::Create(ctx, "else", fun); 1090 llvm::BasicBlock* bbe = llvm::BasicBlock::Create(ctx, "endif", fun); 1091 bld.CreateCondBr(llvm_value(ir->condition), bbt, bbf); 1092 1093 bld.SetInsertPoint(bbt); 1094 visit_exec_list(&ir->then_instructions, this); 1095 bld.CreateBr(bbe); 1096 1097 bld.SetInsertPoint(bbf); 1098 visit_exec_list(&ir->else_instructions, this); 1099 bld.CreateBr(bbe); 1100 1101 bb = bbe; 1102 bld.SetInsertPoint(bb); 1103 } 1104 1105 virtual void visit(class ir_return * ir) 1106 { 1107 if(!ir->value) 1108 bld.CreateRetVoid(); 1109 else 1110 bld.CreateRet(llvm_value(ir->value)); 1111 1112 bb = llvm::BasicBlock::Create(ctx, "dead_code.return", fun); 1113 bld.SetInsertPoint(bb); 1114 } 1115 1116 virtual void visit(class ir_call * ir) 1117 { 1118 std::vector<llvm::Value*> args; 1119 1120 foreach_iter(exec_list_iterator, iter, *ir) 1121 { 1122 ir_rvalue *arg = (ir_constant *)iter.get(); 1123 args.push_back(llvm_value(arg)); 1124 } 1125 1126 result = bld.CreateCall(llvm_function(ir->get_callee()), llvm::ArrayRef<llvm::Value*>(args)); 1127 1128 llvm::AttrListPtr attr; 1129 ((llvm::CallInst*)result)->setAttributes(attr); 1130 } 1131 1132 virtual void visit(class ir_constant * ir) 1133 { 1134 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1135 std::vector<llvm::Constant*> fields; 1136 foreach_iter(exec_list_iterator, iter, ir->components) { 1137 ir_constant *field = (ir_constant *)iter.get(); 1138 fields.push_back(llvm_constant(field)); 1139 } 1140 result = llvm::ConstantStruct::get((llvm::StructType*)llvm_type(ir->type), fields); 1141 } 1142 else if (ir->type->base_type == GLSL_TYPE_ARRAY) { 1143 std::vector<llvm::Constant*> elems; 1144 for (unsigned i = 0; i < ir->type->length; i++) 1145 elems.push_back(llvm_constant(ir->array_elements[i])); 1146 result = llvm::ConstantArray::get((llvm::ArrayType*)llvm_type(ir->type), elems); 1147 } 1148 else 1149 { 1150 llvm::Type* base_type = llvm_base_type(ir->type->base_type); 1151 llvm::Type* vec_type = llvm_vec_type(ir->type); 1152 llvm::Type* type = llvm_type(ir->type); 1153 1154 std::vector<llvm::Constant*> vecs; 1155 unsigned idx = 0; 1156 for (unsigned i = 0; i < ir->type->matrix_columns; ++i) { 1157 std::vector<llvm::Constant*> elems; 1158 for (unsigned j = 0; j < ir->type->vector_elements; ++j) { 1159 llvm::Constant* elem; 1160 switch(ir->type->base_type) 1161 { 1162 case GLSL_TYPE_FLOAT: 1163 elem = llvm::ConstantFP::get(base_type, ir->value.f[idx]); 1164 break; 1165 case GLSL_TYPE_UINT: 1166 elem = llvm::ConstantInt::get(base_type, ir->value.u[idx]); 1167 break; 1168 case GLSL_TYPE_INT: 1169 elem = llvm::ConstantInt::get(base_type, ir->value.i[idx]); 1170 break; 1171 case GLSL_TYPE_BOOL: 1172 elem = llvm::ConstantInt::get(base_type, ir->value.b[idx]); 1173 break; 1174 } 1175 elems.push_back(elem); 1176 ++idx; 1177 } 1178 1179 llvm::Constant* vec; 1180 if(ir->type->vector_elements > 1) { 1181 llvm::ArrayRef<llvm::Constant*> ConstantArray(elems); 1182 vec = llvm::ConstantVector::get(ConstantArray); 1183 } else { 1184 vec = elems[0]; 1185 } 1186 vecs.push_back(vec); 1187 } 1188 1189 if(ir->type->matrix_columns > 1) 1190 result = llvm::ConstantArray::get((llvm::ArrayType*)type, vecs); 1191 else 1192 result = vecs[0]; 1193 } 1194 } 1195 1196 llvm::Value* llvm_shuffle(llvm::Value* val, int* shuffle_mask, unsigned res_width, const llvm::Twine &name = "") 1197 { 1198 llvm::Type* elem_type = val->getType(); 1199 llvm::Type* res_type = elem_type;; 1200 unsigned val_width = 1; 1201 if(val->getType()->isVectorTy()) 1202 { 1203 val_width = ((llvm::VectorType*)val->getType())->getNumElements(); 1204 elem_type = ((llvm::VectorType*)val->getType())->getElementType(); 1205 } 1206 if(res_width > 1) 1207 res_type = llvm::VectorType::get(elem_type, res_width); 1208 1209 llvm::Constant* shuffle_mask_values[4]; 1210 assert(res_width <= 4); 1211 bool any_def = false; 1212 for(unsigned i = 0; i < res_width; ++i) 1213 { 1214 if(shuffle_mask[i] < 0) 1215 shuffle_mask_values[i] = llvm::UndefValue::get(llvm::Type::getInt32Ty(ctx)); 1216 else 1217 { 1218 any_def = true; 1219 shuffle_mask_values[i] = llvm_int(shuffle_mask[i]); 1220 } 1221 } 1222 1223 llvm::Value* undef = llvm::UndefValue::get(res_type); 1224 if(!any_def) 1225 return undef; 1226 1227 if(val_width > 1) 1228 { 1229 if(res_width > 1) 1230 { 1231 if(val_width == res_width) 1232 { 1233 bool nontrivial = false; 1234 for(unsigned i = 0; i < val_width; ++i) 1235 { 1236 if(shuffle_mask[i] != (int)i) 1237 nontrivial = true; 1238 } 1239 if(!nontrivial) 1240 return val; 1241 } 1242 1243 return bld.CreateShuffleVector(val, llvm::UndefValue::get(val->getType()), llvm::ConstantVector::get(pack(shuffle_mask_values, res_width)), name); 1244 } 1245 else 1246 return bld.CreateExtractElement(val, llvm_int(shuffle_mask[0]), name); 1247 } 1248 else 1249 { 1250 if(res_width > 1) 1251 { 1252 llvm::Value* tmp = undef; 1253 for(unsigned i = 0; i < res_width; ++i) 1254 { 1255 if(shuffle_mask[i] >= 0) 1256 tmp = bld.CreateInsertElement(tmp, val, llvm_int(i), name); 1257 } 1258 return tmp; 1259 } 1260 else if(shuffle_mask[0] >= 0) 1261 return val; 1262 else 1263 return undef; 1264 } 1265 } 1266 1267 1268 virtual void visit(class ir_swizzle * swz) 1269 { 1270 llvm::Value* val = llvm_value(swz->val); 1271 int mask[4] = {swz->mask.x, swz->mask.y, swz->mask.z, swz->mask.w}; 1272 result = llvm_shuffle(val, mask, swz->mask.num_components, "swizzle"); 1273 } 1274 1275 virtual void visit(class ir_assignment * ir) 1276 { 1277 llvm::Value* lhs = llvm_pointer(ir->lhs); 1278 llvm::Value* rhs = llvm_value(ir->rhs); 1279 unsigned width = ir->lhs->type->vector_elements; 1280 unsigned mask = (1 << width) - 1; 1281 assert(rhs); 1282 1283 // TODO: masking for matrix assignment 1284 if (ir->rhs->type->is_matrix()) { 1285 bld.CreateStore(rhs, lhs, "mat_str"); 1286 return; 1287 } 1288 1289 if (!(ir->write_mask & mask)) 1290 return; 1291 1292 if (ir->rhs->type->vector_elements < width) { 1293 int expand_mask[4] = {-1, -1, -1, -1}; 1294 for (unsigned i = 0; i < ir->lhs->type->vector_elements; ++i) 1295 expand_mask[i] = i; 1296 // printf("ve: %u w %u issw: %i\n", ir->rhs->type->vector_elements, width, !!ir->rhs->as_swizzle()); 1297 rhs = llvm_shuffle(rhs, expand_mask, width, "assign.expand"); 1298 } 1299 1300 if (width > 1 && (ir->write_mask & mask) != mask) { 1301 llvm::Constant* blend_mask[4]; 1302 // refer to ir.h: ir_assignment::write_mask 1303 // A partially-set write mask means that each enabled channel gets 1304 // the value from a consecutive channel of the rhs. 1305 unsigned rhsChannel = 0; 1306 for (unsigned i = 0; i < width; ++i) { 1307 if (ir->write_mask & (1 << i)) 1308 blend_mask[i] = llvm_int(width + rhsChannel++); 1309 else 1310 blend_mask[i] = llvm_int(i); 1311 } 1312 rhs = bld.CreateShuffleVector(bld.CreateLoad(lhs), rhs, llvm::ConstantVector::get(pack(blend_mask, width)), "assign.writemask"); 1313 } 1314 1315 if(ir->condition) 1316 rhs = bld.CreateSelect(llvm_value(ir->condition), rhs, bld.CreateLoad(lhs), "assign.conditional"); 1317 1318 bld.CreateStore(rhs, lhs); 1319 } 1320 1321 virtual void visit(class ir_variable * var) 1322 { 1323 llvm_variable(var); 1324 } 1325 1326 virtual void visit(ir_function_signature *sig) 1327 { 1328 if(!sig->is_defined) 1329 return; 1330 1331 assert(!fun); 1332 fun = llvm_function(sig); 1333 1334 bb = llvm::BasicBlock::Create(ctx, "entry", fun); 1335 bld.SetInsertPoint(bb); 1336 1337 llvm::Function::arg_iterator ai = fun->arg_begin(); 1338 if (!strcmp("main",sig->function_name())) 1339 { 1340 assert(3 == fun->arg_size()); 1341 bld.CreateStore(ai, inputsPtr); 1342 inputs = ai; 1343 ai++; 1344 bld.CreateStore(ai, outputsPtr); 1345 outputs = ai; 1346 ai++; 1347 bld.CreateStore(ai, constantsPtr); 1348 constants = ai; 1349 ai++; 1350 } 1351 else 1352 { 1353 foreach_iter(exec_list_iterator, iter, sig->parameters) { 1354 ir_variable* arg = (ir_variable*)iter.get(); 1355 ai->setName(arg->name); 1356 bld.CreateStore(ai, llvm_variable(arg)); 1357 ++ai; 1358 } 1359 inputs = bld.CreateLoad(inputsPtr); 1360 outputs = bld.CreateLoad(outputsPtr); 1361 constants = bld.CreateLoad(constantsPtr); 1362 } 1363 inputs->setName("gl_inputs"); 1364 outputs->setName("gl_outputs"); 1365 constants->setName("gl_constants"); 1366 1367 1368 1369 foreach_iter(exec_list_iterator, iter, sig->body) { 1370 ir_instruction *ir = (ir_instruction *)iter.get(); 1371 1372 ir->accept(this); 1373 } 1374 1375 if(fun->getReturnType()->isVoidTy()) 1376 bld.CreateRetVoid(); 1377 else 1378 bld.CreateRet(llvm::UndefValue::get(fun->getReturnType())); 1379 1380 bb = NULL; 1381 fun = NULL; 1382 } 1383 1384 virtual void visit(class ir_function * funs) 1385 { 1386 foreach_iter(exec_list_iterator, iter, *funs) 1387 { 1388 ir_function_signature* sig = (ir_function_signature*)iter.get(); 1389 sig->accept(this); 1390 } 1391 } 1392 }; 1393 1394 struct llvm::Module * 1395 glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod, 1396 const struct GGLState * gglCtx, const char * shaderSuffix) 1397 { 1398 ir_to_llvm_visitor v(mod, gglCtx, shaderSuffix); 1399 1400 visit_exec_list(ir, &v); 1401 1402 // mod->dump(); 1403 if(llvm::verifyModule(*mod, llvm::PrintMessageAction, 0)) 1404 { 1405 puts("**\n module verification failed **\n"); 1406 mod->dump(); 1407 assert(0); 1408 return NULL; 1409 } 1410 1411 return mod; 1412 //v.ir_to_llvm_emit_op1(NULL, OPCODE_END, ir_to_llvm_undef_dst, ir_to_llvm_undef); 1413 } 1414