1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Reactor.hpp" 16 17 #include "llvm/Support/IRBuilder.h" 18 #include "llvm/Function.h" 19 #include "llvm/GlobalVariable.h" 20 #include "llvm/Module.h" 21 #include "llvm/LLVMContext.h" 22 #include "llvm/Constants.h" 23 #include "llvm/Intrinsics.h" 24 #include "llvm/PassManager.h" 25 #include "llvm/Analysis/LoopPass.h" 26 #include "llvm/Transforms/Scalar.h" 27 #include "llvm/Target/TargetData.h" 28 #include "llvm/Target/TargetOptions.h" 29 #include "llvm/Support/TargetSelect.h" 30 #include "../lib/ExecutionEngine/JIT/JIT.h" 31 32 #include "LLVMRoutine.hpp" 33 #include "LLVMRoutineManager.hpp" 34 #include "x86.hpp" 35 #include "CPUID.hpp" 36 #include "Thread.hpp" 37 #include "Memory.hpp" 38 #include "MutexLock.hpp" 39 40 #include <fstream> 41 42 #if defined(__i386__) || defined(__x86_64__) 43 #include <xmmintrin.h> 44 #endif 45 46 #if defined(__x86_64__) && defined(_WIN32) 47 extern "C" void X86CompilationCallback() 48 { 49 assert(false); // UNIMPLEMENTED 50 } 51 #endif 52 53 extern "C" 54 { 55 bool (*CodeAnalystInitialize)() = 0; 56 void (*CodeAnalystCompleteJITLog)() = 0; 57 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0; 58 } 59 60 namespace llvm 61 { 62 extern bool JITEmitDebugInfo; 63 } 64 65 namespace 66 { 67 sw::LLVMRoutineManager *routineManager = nullptr; 68 llvm::ExecutionEngine *executionEngine = nullptr; 69 llvm::IRBuilder<> *builder = nullptr; 70 llvm::LLVMContext *context = nullptr; 71 llvm::Module *module = nullptr; 72 llvm::Function *function = nullptr; 73 74 sw::MutexLock codegenMutex; 75 } 76 77 namespace sw 78 { 79 Optimization optimization[10] = {InstructionCombining, Disabled}; 80 81 enum EmulatedType 82 { 83 Type_v2i32, 84 Type_v4i16, 85 Type_v2i16, 86 Type_v8i8, 87 Type_v4i8, 88 Type_v2f32, 89 EmulatedTypeCount 90 }; 91 92 class Value : public llvm::Value {}; 93 class SwitchCases : public llvm::SwitchInst {}; 94 class BasicBlock : public llvm::BasicBlock {}; 95 96 llvm::Type *T(Type *t) 97 { 98 uintptr_t type = reinterpret_cast<uintptr_t>(t); 99 if(type < EmulatedTypeCount) 100 { 101 // Use 128-bit vectors to implement logically shorter ones. 102 switch(type) 103 { 104 case Type_v2i32: return T(Int4::getType()); 105 case Type_v4i16: return T(Short8::getType()); 106 case Type_v2i16: return T(Short8::getType()); 107 case Type_v8i8: return T(Byte16::getType()); 108 case Type_v4i8: return T(Byte16::getType()); 109 case Type_v2f32: return T(Float4::getType()); 110 default: assert(false); 111 } 112 } 113 114 return reinterpret_cast<llvm::Type*>(t); 115 } 116 117 inline Type *T(llvm::Type *t) 118 { 119 return reinterpret_cast<Type*>(t); 120 } 121 122 Type *T(EmulatedType t) 123 { 124 return reinterpret_cast<Type*>(t); 125 } 126 127 inline Value *V(llvm::Value *t) 128 { 129 return reinterpret_cast<Value*>(t); 130 } 131 132 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t) 133 { 134 return reinterpret_cast<std::vector<llvm::Type*>&>(t); 135 } 136 137 inline BasicBlock *B(llvm::BasicBlock *t) 138 { 139 return reinterpret_cast<BasicBlock*>(t); 140 } 141 142 static size_t typeSize(Type *type) 143 { 144 uintptr_t t = reinterpret_cast<uintptr_t>(type); 145 if(t < EmulatedTypeCount) 146 { 147 switch(t) 148 { 149 case Type_v2i32: return 8; 150 case Type_v4i16: return 8; 151 case Type_v2i16: return 4; 152 case Type_v8i8: return 8; 153 case Type_v4i8: return 4; 154 case Type_v2f32: return 8; 155 default: assert(false); 156 } 157 } 158 159 return T(type)->getPrimitiveSizeInBits() / 8; 160 } 161 162 static unsigned int elementCount(Type *type) 163 { 164 uintptr_t t = reinterpret_cast<uintptr_t>(type); 165 if(t < EmulatedTypeCount) 166 { 167 switch(t) 168 { 169 case Type_v2i32: return 2; 170 case Type_v4i16: return 4; 171 case Type_v2i16: return 2; 172 case Type_v8i8: return 8; 173 case Type_v4i8: return 4; 174 case Type_v2f32: return 2; 175 default: assert(false); 176 } 177 } 178 179 return llvm::cast<llvm::VectorType>(T(type))->getNumElements(); 180 } 181 182 Nucleus::Nucleus() 183 { 184 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe 185 186 llvm::InitializeNativeTarget(); 187 llvm::JITEmitDebugInfo = false; 188 189 if(!::context) 190 { 191 ::context = new llvm::LLVMContext(); 192 } 193 194 ::module = new llvm::Module("", *::context); 195 ::routineManager = new LLVMRoutineManager(); 196 197 #if defined(__x86_64__) 198 const char *architecture = "x86-64"; 199 #else 200 const char *architecture = "x86"; 201 #endif 202 203 llvm::SmallVector<std::string, 1> MAttrs; 204 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx"); 205 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov"); 206 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse"); 207 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2"); 208 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3"); 209 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3"); 210 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41"); 211 212 std::string error; 213 llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error); 214 ::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine); 215 216 if(!::builder) 217 { 218 ::builder = new llvm::IRBuilder<>(*::context); 219 220 #if defined(_WIN32) 221 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll"); 222 if(CodeAnalyst) 223 { 224 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize"); 225 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog"); 226 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode"); 227 228 CodeAnalystInitialize(); 229 } 230 #endif 231 } 232 } 233 234 Nucleus::~Nucleus() 235 { 236 delete ::executionEngine; 237 ::executionEngine = nullptr; 238 239 ::routineManager = nullptr; 240 ::function = nullptr; 241 ::module = nullptr; 242 243 ::codegenMutex.unlock(); 244 } 245 246 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations) 247 { 248 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator()) 249 { 250 llvm::Type *type = ::function->getReturnType(); 251 252 if(type->isVoidTy()) 253 { 254 createRetVoid(); 255 } 256 else 257 { 258 createRet(V(llvm::UndefValue::get(type))); 259 } 260 } 261 262 if(false) 263 { 264 std::string error; 265 llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error); 266 ::module->print(file, 0); 267 } 268 269 if(runOptimizations) 270 { 271 optimize(); 272 } 273 274 if(false) 275 { 276 std::string error; 277 llvm::raw_fd_ostream file("llvm-dump-opt.txt", error); 278 ::module->print(file, 0); 279 } 280 281 void *entry = ::executionEngine->getPointerToFunction(::function); 282 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry); 283 284 if(CodeAnalystLogJITCode) 285 { 286 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name); 287 } 288 289 return routine; 290 } 291 292 void Nucleus::optimize() 293 { 294 static llvm::PassManager *passManager = nullptr; 295 296 if(!passManager) 297 { 298 passManager = new llvm::PassManager(); 299 300 llvm::UnsafeFPMath = true; 301 // llvm::NoInfsFPMath = true; 302 // llvm::NoNaNsFPMath = true; 303 304 passManager->add(new llvm::TargetData(*::executionEngine->getTargetData())); 305 passManager->add(llvm::createScalarReplAggregatesPass()); 306 307 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 308 { 309 switch(optimization[pass]) 310 { 311 case Disabled: break; 312 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; 313 case LICM: passManager->add(llvm::createLICMPass()); break; 314 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; 315 case GVN: passManager->add(llvm::createGVNPass()); break; 316 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; 317 case Reassociate: passManager->add(llvm::createReassociatePass()); break; 318 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; 319 case SCCP: passManager->add(llvm::createSCCPPass()); break; 320 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break; 321 default: 322 assert(false); 323 } 324 } 325 } 326 327 passManager->run(*::module); 328 } 329 330 Value *Nucleus::allocateStackVariable(Type *type, int arraySize) 331 { 332 // Need to allocate it in the entry block for mem2reg to work 333 llvm::BasicBlock &entryBlock = ::function->getEntryBlock(); 334 335 llvm::Instruction *declaration; 336 337 if(arraySize) 338 { 339 declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize)); 340 } 341 else 342 { 343 declaration = new llvm::AllocaInst(T(type), (Value*)nullptr); 344 } 345 346 entryBlock.getInstList().push_front(declaration); 347 348 return V(declaration); 349 } 350 351 BasicBlock *Nucleus::createBasicBlock() 352 { 353 return B(BasicBlock::Create(*::context, "", ::function)); 354 } 355 356 BasicBlock *Nucleus::getInsertBlock() 357 { 358 return B(::builder->GetInsertBlock()); 359 } 360 361 void Nucleus::setInsertBlock(BasicBlock *basicBlock) 362 { 363 // assert(::builder->GetInsertBlock()->back().isTerminator()); 364 return ::builder->SetInsertPoint(basicBlock); 365 } 366 367 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params) 368 { 369 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false); 370 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module); 371 ::function->setCallingConv(llvm::CallingConv::C); 372 373 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function)); 374 } 375 376 Value *Nucleus::getArgument(unsigned int index) 377 { 378 llvm::Function::arg_iterator args = ::function->arg_begin(); 379 380 while(index) 381 { 382 args++; 383 index--; 384 } 385 386 return V(&*args); 387 } 388 389 void Nucleus::createRetVoid() 390 { 391 ::builder->CreateRetVoid(); 392 } 393 394 void Nucleus::createRet(Value *v) 395 { 396 ::builder->CreateRet(v); 397 } 398 399 void Nucleus::createBr(BasicBlock *dest) 400 { 401 ::builder->CreateBr(dest); 402 } 403 404 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse) 405 { 406 ::builder->CreateCondBr(cond, ifTrue, ifFalse); 407 } 408 409 Value *Nucleus::createAdd(Value *lhs, Value *rhs) 410 { 411 return V(::builder->CreateAdd(lhs, rhs)); 412 } 413 414 Value *Nucleus::createSub(Value *lhs, Value *rhs) 415 { 416 return V(::builder->CreateSub(lhs, rhs)); 417 } 418 419 Value *Nucleus::createMul(Value *lhs, Value *rhs) 420 { 421 return V(::builder->CreateMul(lhs, rhs)); 422 } 423 424 Value *Nucleus::createUDiv(Value *lhs, Value *rhs) 425 { 426 return V(::builder->CreateUDiv(lhs, rhs)); 427 } 428 429 Value *Nucleus::createSDiv(Value *lhs, Value *rhs) 430 { 431 return V(::builder->CreateSDiv(lhs, rhs)); 432 } 433 434 Value *Nucleus::createFAdd(Value *lhs, Value *rhs) 435 { 436 return V(::builder->CreateFAdd(lhs, rhs)); 437 } 438 439 Value *Nucleus::createFSub(Value *lhs, Value *rhs) 440 { 441 return V(::builder->CreateFSub(lhs, rhs)); 442 } 443 444 Value *Nucleus::createFMul(Value *lhs, Value *rhs) 445 { 446 return V(::builder->CreateFMul(lhs, rhs)); 447 } 448 449 Value *Nucleus::createFDiv(Value *lhs, Value *rhs) 450 { 451 return V(::builder->CreateFDiv(lhs, rhs)); 452 } 453 454 Value *Nucleus::createURem(Value *lhs, Value *rhs) 455 { 456 return V(::builder->CreateURem(lhs, rhs)); 457 } 458 459 Value *Nucleus::createSRem(Value *lhs, Value *rhs) 460 { 461 return V(::builder->CreateSRem(lhs, rhs)); 462 } 463 464 Value *Nucleus::createFRem(Value *lhs, Value *rhs) 465 { 466 return V(::builder->CreateFRem(lhs, rhs)); 467 } 468 469 Value *Nucleus::createShl(Value *lhs, Value *rhs) 470 { 471 return V(::builder->CreateShl(lhs, rhs)); 472 } 473 474 Value *Nucleus::createLShr(Value *lhs, Value *rhs) 475 { 476 return V(::builder->CreateLShr(lhs, rhs)); 477 } 478 479 Value *Nucleus::createAShr(Value *lhs, Value *rhs) 480 { 481 return V(::builder->CreateAShr(lhs, rhs)); 482 } 483 484 Value *Nucleus::createAnd(Value *lhs, Value *rhs) 485 { 486 return V(::builder->CreateAnd(lhs, rhs)); 487 } 488 489 Value *Nucleus::createOr(Value *lhs, Value *rhs) 490 { 491 return V(::builder->CreateOr(lhs, rhs)); 492 } 493 494 Value *Nucleus::createXor(Value *lhs, Value *rhs) 495 { 496 return V(::builder->CreateXor(lhs, rhs)); 497 } 498 499 Value *Nucleus::createNeg(Value *v) 500 { 501 return V(::builder->CreateNeg(v)); 502 } 503 504 Value *Nucleus::createFNeg(Value *v) 505 { 506 return V(::builder->CreateFNeg(v)); 507 } 508 509 Value *Nucleus::createNot(Value *v) 510 { 511 return V(::builder->CreateNot(v)); 512 } 513 514 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 515 { 516 uintptr_t t = reinterpret_cast<uintptr_t>(type); 517 if(t < EmulatedTypeCount) 518 { 519 switch(t) 520 { 521 case Type_v2i32: 522 case Type_v4i16: 523 case Type_v8i8: 524 case Type_v2f32: 525 return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type))); 526 case Type_v2i16: 527 case Type_v4i8: 528 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 529 { 530 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))); 531 Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment)); 532 i = createZExt(i, Long::getType()); 533 Value *v = V(createInsertElement(u, i, 0)); 534 return createBitCast(v, T(T(type))); 535 } 536 break; 537 default: 538 assert(false); 539 } 540 } 541 542 assert(ptr->getType()->getContainedType(0) == T(type)); 543 return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment))); 544 } 545 546 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 547 { 548 uintptr_t t = reinterpret_cast<uintptr_t>(type); 549 if(t < EmulatedTypeCount) 550 { 551 switch(t) 552 { 553 case Type_v2i32: 554 case Type_v4i16: 555 case Type_v8i8: 556 case Type_v2f32: 557 createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment); 558 return value; 559 case Type_v2i16: 560 case Type_v4i8: 561 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 562 { 563 createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment); 564 return value; 565 } 566 break; 567 default: 568 assert(false); 569 } 570 } 571 572 assert(ptr->getType()->getContainedType(0) == T(type)); 573 ::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment)); 574 return value; 575 } 576 577 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex) 578 { 579 if(sizeof(void*) == 8) 580 { 581 if(unsignedIndex) 582 { 583 index = createZExt(index, Long::getType()); 584 } 585 else 586 { 587 index = createSExt(index, Long::getType()); 588 } 589 590 index = createMul(index, createConstantLong((int64_t)typeSize(type))); 591 } 592 else 593 { 594 index = createMul(index, createConstantInt((int)typeSize(type))); 595 } 596 597 assert(ptr->getType()->getContainedType(0) == T(type)); 598 return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0))); 599 } 600 601 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) 602 { 603 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent)); 604 } 605 606 Value *Nucleus::createTrunc(Value *v, Type *destType) 607 { 608 return V(::builder->CreateTrunc(v, T(destType))); 609 } 610 611 Value *Nucleus::createZExt(Value *v, Type *destType) 612 { 613 return V(::builder->CreateZExt(v, T(destType))); 614 } 615 616 Value *Nucleus::createSExt(Value *v, Type *destType) 617 { 618 return V(::builder->CreateSExt(v, T(destType))); 619 } 620 621 Value *Nucleus::createFPToSI(Value *v, Type *destType) 622 { 623 return V(::builder->CreateFPToSI(v, T(destType))); 624 } 625 626 Value *Nucleus::createSIToFP(Value *v, Type *destType) 627 { 628 return V(::builder->CreateSIToFP(v, T(destType))); 629 } 630 631 Value *Nucleus::createFPTrunc(Value *v, Type *destType) 632 { 633 return V(::builder->CreateFPTrunc(v, T(destType))); 634 } 635 636 Value *Nucleus::createFPExt(Value *v, Type *destType) 637 { 638 return V(::builder->CreateFPExt(v, T(destType))); 639 } 640 641 Value *Nucleus::createBitCast(Value *v, Type *destType) 642 { 643 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need 644 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and 645 // reading back as the destination type. 646 if(!v->getType()->isVectorTy() && T(destType)->isVectorTy()) 647 { 648 Value *readAddress = allocateStackVariable(destType); 649 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0))); 650 createStore(v, writeAddress, T(v->getType())); 651 return createLoad(readAddress, destType); 652 } 653 else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy()) 654 { 655 Value *writeAddress = allocateStackVariable(T(v->getType())); 656 createStore(v, writeAddress, T(v->getType())); 657 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0))); 658 return createLoad(readAddress, destType); 659 } 660 661 return V(::builder->CreateBitCast(v, T(destType))); 662 } 663 664 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) 665 { 666 return V(::builder->CreateICmpEQ(lhs, rhs)); 667 } 668 669 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs) 670 { 671 return V(::builder->CreateICmpNE(lhs, rhs)); 672 } 673 674 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs) 675 { 676 return V(::builder->CreateICmpUGT(lhs, rhs)); 677 } 678 679 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs) 680 { 681 return V(::builder->CreateICmpUGE(lhs, rhs)); 682 } 683 684 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs) 685 { 686 return V(::builder->CreateICmpULT(lhs, rhs)); 687 } 688 689 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs) 690 { 691 return V(::builder->CreateICmpULE(lhs, rhs)); 692 } 693 694 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs) 695 { 696 return V(::builder->CreateICmpSGT(lhs, rhs)); 697 } 698 699 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs) 700 { 701 return V(::builder->CreateICmpSGE(lhs, rhs)); 702 } 703 704 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs) 705 { 706 return V(::builder->CreateICmpSLT(lhs, rhs)); 707 } 708 709 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs) 710 { 711 return V(::builder->CreateICmpSLE(lhs, rhs)); 712 } 713 714 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs) 715 { 716 return V(::builder->CreateFCmpOEQ(lhs, rhs)); 717 } 718 719 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs) 720 { 721 return V(::builder->CreateFCmpOGT(lhs, rhs)); 722 } 723 724 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs) 725 { 726 return V(::builder->CreateFCmpOGE(lhs, rhs)); 727 } 728 729 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs) 730 { 731 return V(::builder->CreateFCmpOLT(lhs, rhs)); 732 } 733 734 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs) 735 { 736 return V(::builder->CreateFCmpOLE(lhs, rhs)); 737 } 738 739 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs) 740 { 741 return V(::builder->CreateFCmpONE(lhs, rhs)); 742 } 743 744 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs) 745 { 746 return V(::builder->CreateFCmpORD(lhs, rhs)); 747 } 748 749 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs) 750 { 751 return V(::builder->CreateFCmpUNO(lhs, rhs)); 752 } 753 754 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs) 755 { 756 return V(::builder->CreateFCmpUEQ(lhs, rhs)); 757 } 758 759 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs) 760 { 761 return V(::builder->CreateFCmpUGT(lhs, rhs)); 762 } 763 764 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs) 765 { 766 return V(::builder->CreateFCmpUGE(lhs, rhs)); 767 } 768 769 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs) 770 { 771 return V(::builder->CreateFCmpULT(lhs, rhs)); 772 } 773 774 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs) 775 { 776 return V(::builder->CreateFCmpULE(lhs, rhs)); 777 } 778 779 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs) 780 { 781 return V(::builder->CreateFCmpULE(lhs, rhs)); 782 } 783 784 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index) 785 { 786 assert(vector->getType()->getContainedType(0) == T(type)); 787 return V(::builder->CreateExtractElement(vector, createConstantInt(index))); 788 } 789 790 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index) 791 { 792 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index))); 793 } 794 795 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select) 796 { 797 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements(); 798 const int maxSize = 16; 799 llvm::Constant *swizzle[maxSize]; 800 assert(size <= maxSize); 801 802 for(int i = 0; i < size; i++) 803 { 804 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]); 805 } 806 807 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size)); 808 809 return V(::builder->CreateShuffleVector(V1, V2, shuffle)); 810 } 811 812 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse) 813 { 814 return V(::builder->CreateSelect(C, ifTrue, ifFalse)); 815 } 816 817 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases) 818 { 819 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases)); 820 } 821 822 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch) 823 { 824 switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch); 825 } 826 827 void Nucleus::createUnreachable() 828 { 829 ::builder->CreateUnreachable(); 830 } 831 832 static Value *createSwizzle4(Value *val, unsigned char select) 833 { 834 int swizzle[4] = 835 { 836 (select >> 0) & 0x03, 837 (select >> 2) & 0x03, 838 (select >> 4) & 0x03, 839 (select >> 6) & 0x03, 840 }; 841 842 return Nucleus::createShuffleVector(val, val, swizzle); 843 } 844 845 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select) 846 { 847 bool mask[4] = {false, false, false, false}; 848 849 mask[(select >> 0) & 0x03] = true; 850 mask[(select >> 2) & 0x03] = true; 851 mask[(select >> 4) & 0x03] = true; 852 mask[(select >> 6) & 0x03] = true; 853 854 int swizzle[4] = 855 { 856 mask[0] ? 4 : 0, 857 mask[1] ? 5 : 1, 858 mask[2] ? 6 : 2, 859 mask[3] ? 7 : 3, 860 }; 861 862 return Nucleus::createShuffleVector(lhs, rhs, swizzle); 863 } 864 865 Type *Nucleus::getPointerType(Type *ElementType) 866 { 867 return T(llvm::PointerType::get(T(ElementType), 0)); 868 } 869 870 Value *Nucleus::createNullValue(Type *Ty) 871 { 872 return V(llvm::Constant::getNullValue(T(Ty))); 873 } 874 875 Value *Nucleus::createConstantLong(int64_t i) 876 { 877 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true)); 878 } 879 880 Value *Nucleus::createConstantInt(int i) 881 { 882 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true)); 883 } 884 885 Value *Nucleus::createConstantInt(unsigned int i) 886 { 887 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false)); 888 } 889 890 Value *Nucleus::createConstantBool(bool b) 891 { 892 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b)); 893 } 894 895 Value *Nucleus::createConstantByte(signed char i) 896 { 897 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true)); 898 } 899 900 Value *Nucleus::createConstantByte(unsigned char i) 901 { 902 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false)); 903 } 904 905 Value *Nucleus::createConstantShort(short i) 906 { 907 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true)); 908 } 909 910 Value *Nucleus::createConstantShort(unsigned short i) 911 { 912 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false)); 913 } 914 915 Value *Nucleus::createConstantFloat(float x) 916 { 917 return V(llvm::ConstantFP::get(T(Float::getType()), x)); 918 } 919 920 Value *Nucleus::createNullPointer(Type *Ty) 921 { 922 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0))); 923 } 924 925 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type) 926 { 927 assert(llvm::isa<llvm::VectorType>(T(type))); 928 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 929 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 930 assert(numElements <= 16 && numConstants <= numElements); 931 llvm::Constant *constantVector[16]; 932 933 for(int i = 0; i < numElements; i++) 934 { 935 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]); 936 } 937 938 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 939 } 940 941 Value *Nucleus::createConstantVector(const double *constants, Type *type) 942 { 943 assert(llvm::isa<llvm::VectorType>(T(type))); 944 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 945 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 946 assert(numElements <= 8 && numConstants <= numElements); 947 llvm::Constant *constantVector[8]; 948 949 for(int i = 0; i < numElements; i++) 950 { 951 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]); 952 } 953 954 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 955 } 956 957 Type *Void::getType() 958 { 959 return T(llvm::Type::getVoidTy(*::context)); 960 } 961 962 Bool::Bool(Argument<Bool> argument) 963 { 964 storeValue(argument.value); 965 } 966 967 Bool::Bool(bool x) 968 { 969 storeValue(Nucleus::createConstantBool(x)); 970 } 971 972 Bool::Bool(RValue<Bool> rhs) 973 { 974 storeValue(rhs.value); 975 } 976 977 Bool::Bool(const Bool &rhs) 978 { 979 Value *value = rhs.loadValue(); 980 storeValue(value); 981 } 982 983 Bool::Bool(const Reference<Bool> &rhs) 984 { 985 Value *value = rhs.loadValue(); 986 storeValue(value); 987 } 988 989 RValue<Bool> Bool::operator=(RValue<Bool> rhs) 990 { 991 storeValue(rhs.value); 992 993 return rhs; 994 } 995 996 RValue<Bool> Bool::operator=(const Bool &rhs) 997 { 998 Value *value = rhs.loadValue(); 999 storeValue(value); 1000 1001 return RValue<Bool>(value); 1002 } 1003 1004 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) 1005 { 1006 Value *value = rhs.loadValue(); 1007 storeValue(value); 1008 1009 return RValue<Bool>(value); 1010 } 1011 1012 RValue<Bool> operator!(RValue<Bool> val) 1013 { 1014 return RValue<Bool>(Nucleus::createNot(val.value)); 1015 } 1016 1017 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs) 1018 { 1019 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value)); 1020 } 1021 1022 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs) 1023 { 1024 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value)); 1025 } 1026 1027 Type *Bool::getType() 1028 { 1029 return T(llvm::Type::getInt1Ty(*::context)); 1030 } 1031 1032 Byte::Byte(Argument<Byte> argument) 1033 { 1034 storeValue(argument.value); 1035 } 1036 1037 Byte::Byte(RValue<Int> cast) 1038 { 1039 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1040 1041 storeValue(integer); 1042 } 1043 1044 Byte::Byte(RValue<UInt> cast) 1045 { 1046 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1047 1048 storeValue(integer); 1049 } 1050 1051 Byte::Byte(RValue<UShort> cast) 1052 { 1053 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1054 1055 storeValue(integer); 1056 } 1057 1058 Byte::Byte(int x) 1059 { 1060 storeValue(Nucleus::createConstantByte((unsigned char)x)); 1061 } 1062 1063 Byte::Byte(unsigned char x) 1064 { 1065 storeValue(Nucleus::createConstantByte(x)); 1066 } 1067 1068 Byte::Byte(RValue<Byte> rhs) 1069 { 1070 storeValue(rhs.value); 1071 } 1072 1073 Byte::Byte(const Byte &rhs) 1074 { 1075 Value *value = rhs.loadValue(); 1076 storeValue(value); 1077 } 1078 1079 Byte::Byte(const Reference<Byte> &rhs) 1080 { 1081 Value *value = rhs.loadValue(); 1082 storeValue(value); 1083 } 1084 1085 RValue<Byte> Byte::operator=(RValue<Byte> rhs) 1086 { 1087 storeValue(rhs.value); 1088 1089 return rhs; 1090 } 1091 1092 RValue<Byte> Byte::operator=(const Byte &rhs) 1093 { 1094 Value *value = rhs.loadValue(); 1095 storeValue(value); 1096 1097 return RValue<Byte>(value); 1098 } 1099 1100 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) 1101 { 1102 Value *value = rhs.loadValue(); 1103 storeValue(value); 1104 1105 return RValue<Byte>(value); 1106 } 1107 1108 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs) 1109 { 1110 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value)); 1111 } 1112 1113 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs) 1114 { 1115 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value)); 1116 } 1117 1118 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs) 1119 { 1120 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value)); 1121 } 1122 1123 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs) 1124 { 1125 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value)); 1126 } 1127 1128 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs) 1129 { 1130 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value)); 1131 } 1132 1133 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs) 1134 { 1135 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value)); 1136 } 1137 1138 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs) 1139 { 1140 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value)); 1141 } 1142 1143 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs) 1144 { 1145 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value)); 1146 } 1147 1148 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs) 1149 { 1150 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value)); 1151 } 1152 1153 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs) 1154 { 1155 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value)); 1156 } 1157 1158 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs) 1159 { 1160 return lhs = lhs + rhs; 1161 } 1162 1163 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs) 1164 { 1165 return lhs = lhs - rhs; 1166 } 1167 1168 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs) 1169 { 1170 return lhs = lhs * rhs; 1171 } 1172 1173 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs) 1174 { 1175 return lhs = lhs / rhs; 1176 } 1177 1178 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs) 1179 { 1180 return lhs = lhs % rhs; 1181 } 1182 1183 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs) 1184 { 1185 return lhs = lhs & rhs; 1186 } 1187 1188 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs) 1189 { 1190 return lhs = lhs | rhs; 1191 } 1192 1193 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs) 1194 { 1195 return lhs = lhs ^ rhs; 1196 } 1197 1198 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs) 1199 { 1200 return lhs = lhs << rhs; 1201 } 1202 1203 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs) 1204 { 1205 return lhs = lhs >> rhs; 1206 } 1207 1208 RValue<Byte> operator+(RValue<Byte> val) 1209 { 1210 return val; 1211 } 1212 1213 RValue<Byte> operator-(RValue<Byte> val) 1214 { 1215 return RValue<Byte>(Nucleus::createNeg(val.value)); 1216 } 1217 1218 RValue<Byte> operator~(RValue<Byte> val) 1219 { 1220 return RValue<Byte>(Nucleus::createNot(val.value)); 1221 } 1222 1223 RValue<Byte> operator++(Byte &val, int) // Post-increment 1224 { 1225 RValue<Byte> res = val; 1226 1227 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1228 val.storeValue(inc); 1229 1230 return res; 1231 } 1232 1233 const Byte &operator++(Byte &val) // Pre-increment 1234 { 1235 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1236 val.storeValue(inc); 1237 1238 return val; 1239 } 1240 1241 RValue<Byte> operator--(Byte &val, int) // Post-decrement 1242 { 1243 RValue<Byte> res = val; 1244 1245 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1246 val.storeValue(inc); 1247 1248 return res; 1249 } 1250 1251 const Byte &operator--(Byte &val) // Pre-decrement 1252 { 1253 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1254 val.storeValue(inc); 1255 1256 return val; 1257 } 1258 1259 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs) 1260 { 1261 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1262 } 1263 1264 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs) 1265 { 1266 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1267 } 1268 1269 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs) 1270 { 1271 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1272 } 1273 1274 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs) 1275 { 1276 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1277 } 1278 1279 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs) 1280 { 1281 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1282 } 1283 1284 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs) 1285 { 1286 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1287 } 1288 1289 Type *Byte::getType() 1290 { 1291 return T(llvm::Type::getInt8Ty(*::context)); 1292 } 1293 1294 SByte::SByte(Argument<SByte> argument) 1295 { 1296 storeValue(argument.value); 1297 } 1298 1299 SByte::SByte(RValue<Int> cast) 1300 { 1301 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1302 1303 storeValue(integer); 1304 } 1305 1306 SByte::SByte(RValue<Short> cast) 1307 { 1308 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1309 1310 storeValue(integer); 1311 } 1312 1313 SByte::SByte(signed char x) 1314 { 1315 storeValue(Nucleus::createConstantByte(x)); 1316 } 1317 1318 SByte::SByte(RValue<SByte> rhs) 1319 { 1320 storeValue(rhs.value); 1321 } 1322 1323 SByte::SByte(const SByte &rhs) 1324 { 1325 Value *value = rhs.loadValue(); 1326 storeValue(value); 1327 } 1328 1329 SByte::SByte(const Reference<SByte> &rhs) 1330 { 1331 Value *value = rhs.loadValue(); 1332 storeValue(value); 1333 } 1334 1335 RValue<SByte> SByte::operator=(RValue<SByte> rhs) 1336 { 1337 storeValue(rhs.value); 1338 1339 return rhs; 1340 } 1341 1342 RValue<SByte> SByte::operator=(const SByte &rhs) 1343 { 1344 Value *value = rhs.loadValue(); 1345 storeValue(value); 1346 1347 return RValue<SByte>(value); 1348 } 1349 1350 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) 1351 { 1352 Value *value = rhs.loadValue(); 1353 storeValue(value); 1354 1355 return RValue<SByte>(value); 1356 } 1357 1358 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs) 1359 { 1360 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value)); 1361 } 1362 1363 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs) 1364 { 1365 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value)); 1366 } 1367 1368 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs) 1369 { 1370 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value)); 1371 } 1372 1373 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs) 1374 { 1375 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value)); 1376 } 1377 1378 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs) 1379 { 1380 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value)); 1381 } 1382 1383 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs) 1384 { 1385 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value)); 1386 } 1387 1388 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs) 1389 { 1390 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value)); 1391 } 1392 1393 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs) 1394 { 1395 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value)); 1396 } 1397 1398 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs) 1399 { 1400 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value)); 1401 } 1402 1403 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs) 1404 { 1405 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value)); 1406 } 1407 1408 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs) 1409 { 1410 return lhs = lhs + rhs; 1411 } 1412 1413 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs) 1414 { 1415 return lhs = lhs - rhs; 1416 } 1417 1418 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs) 1419 { 1420 return lhs = lhs * rhs; 1421 } 1422 1423 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs) 1424 { 1425 return lhs = lhs / rhs; 1426 } 1427 1428 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs) 1429 { 1430 return lhs = lhs % rhs; 1431 } 1432 1433 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs) 1434 { 1435 return lhs = lhs & rhs; 1436 } 1437 1438 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs) 1439 { 1440 return lhs = lhs | rhs; 1441 } 1442 1443 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs) 1444 { 1445 return lhs = lhs ^ rhs; 1446 } 1447 1448 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs) 1449 { 1450 return lhs = lhs << rhs; 1451 } 1452 1453 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs) 1454 { 1455 return lhs = lhs >> rhs; 1456 } 1457 1458 RValue<SByte> operator+(RValue<SByte> val) 1459 { 1460 return val; 1461 } 1462 1463 RValue<SByte> operator-(RValue<SByte> val) 1464 { 1465 return RValue<SByte>(Nucleus::createNeg(val.value)); 1466 } 1467 1468 RValue<SByte> operator~(RValue<SByte> val) 1469 { 1470 return RValue<SByte>(Nucleus::createNot(val.value)); 1471 } 1472 1473 RValue<SByte> operator++(SByte &val, int) // Post-increment 1474 { 1475 RValue<SByte> res = val; 1476 1477 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1))); 1478 val.storeValue(inc); 1479 1480 return res; 1481 } 1482 1483 const SByte &operator++(SByte &val) // Pre-increment 1484 { 1485 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1486 val.storeValue(inc); 1487 1488 return val; 1489 } 1490 1491 RValue<SByte> operator--(SByte &val, int) // Post-decrement 1492 { 1493 RValue<SByte> res = val; 1494 1495 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1))); 1496 val.storeValue(inc); 1497 1498 return res; 1499 } 1500 1501 const SByte &operator--(SByte &val) // Pre-decrement 1502 { 1503 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1504 val.storeValue(inc); 1505 1506 return val; 1507 } 1508 1509 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs) 1510 { 1511 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1512 } 1513 1514 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs) 1515 { 1516 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1517 } 1518 1519 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs) 1520 { 1521 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1522 } 1523 1524 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs) 1525 { 1526 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1527 } 1528 1529 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs) 1530 { 1531 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1532 } 1533 1534 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs) 1535 { 1536 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1537 } 1538 1539 Type *SByte::getType() 1540 { 1541 return T(llvm::Type::getInt8Ty(*::context)); 1542 } 1543 1544 Short::Short(Argument<Short> argument) 1545 { 1546 storeValue(argument.value); 1547 } 1548 1549 Short::Short(RValue<Int> cast) 1550 { 1551 Value *integer = Nucleus::createTrunc(cast.value, Short::getType()); 1552 1553 storeValue(integer); 1554 } 1555 1556 Short::Short(short x) 1557 { 1558 storeValue(Nucleus::createConstantShort(x)); 1559 } 1560 1561 Short::Short(RValue<Short> rhs) 1562 { 1563 storeValue(rhs.value); 1564 } 1565 1566 Short::Short(const Short &rhs) 1567 { 1568 Value *value = rhs.loadValue(); 1569 storeValue(value); 1570 } 1571 1572 Short::Short(const Reference<Short> &rhs) 1573 { 1574 Value *value = rhs.loadValue(); 1575 storeValue(value); 1576 } 1577 1578 RValue<Short> Short::operator=(RValue<Short> rhs) 1579 { 1580 storeValue(rhs.value); 1581 1582 return rhs; 1583 } 1584 1585 RValue<Short> Short::operator=(const Short &rhs) 1586 { 1587 Value *value = rhs.loadValue(); 1588 storeValue(value); 1589 1590 return RValue<Short>(value); 1591 } 1592 1593 RValue<Short> Short::operator=(const Reference<Short> &rhs) 1594 { 1595 Value *value = rhs.loadValue(); 1596 storeValue(value); 1597 1598 return RValue<Short>(value); 1599 } 1600 1601 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs) 1602 { 1603 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value)); 1604 } 1605 1606 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs) 1607 { 1608 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value)); 1609 } 1610 1611 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs) 1612 { 1613 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value)); 1614 } 1615 1616 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs) 1617 { 1618 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value)); 1619 } 1620 1621 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs) 1622 { 1623 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value)); 1624 } 1625 1626 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs) 1627 { 1628 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value)); 1629 } 1630 1631 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs) 1632 { 1633 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value)); 1634 } 1635 1636 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs) 1637 { 1638 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value)); 1639 } 1640 1641 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs) 1642 { 1643 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value)); 1644 } 1645 1646 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs) 1647 { 1648 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value)); 1649 } 1650 1651 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs) 1652 { 1653 return lhs = lhs + rhs; 1654 } 1655 1656 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs) 1657 { 1658 return lhs = lhs - rhs; 1659 } 1660 1661 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs) 1662 { 1663 return lhs = lhs * rhs; 1664 } 1665 1666 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs) 1667 { 1668 return lhs = lhs / rhs; 1669 } 1670 1671 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs) 1672 { 1673 return lhs = lhs % rhs; 1674 } 1675 1676 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs) 1677 { 1678 return lhs = lhs & rhs; 1679 } 1680 1681 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs) 1682 { 1683 return lhs = lhs | rhs; 1684 } 1685 1686 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs) 1687 { 1688 return lhs = lhs ^ rhs; 1689 } 1690 1691 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs) 1692 { 1693 return lhs = lhs << rhs; 1694 } 1695 1696 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs) 1697 { 1698 return lhs = lhs >> rhs; 1699 } 1700 1701 RValue<Short> operator+(RValue<Short> val) 1702 { 1703 return val; 1704 } 1705 1706 RValue<Short> operator-(RValue<Short> val) 1707 { 1708 return RValue<Short>(Nucleus::createNeg(val.value)); 1709 } 1710 1711 RValue<Short> operator~(RValue<Short> val) 1712 { 1713 return RValue<Short>(Nucleus::createNot(val.value)); 1714 } 1715 1716 RValue<Short> operator++(Short &val, int) // Post-increment 1717 { 1718 RValue<Short> res = val; 1719 1720 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1))); 1721 val.storeValue(inc); 1722 1723 return res; 1724 } 1725 1726 const Short &operator++(Short &val) // Pre-increment 1727 { 1728 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1729 val.storeValue(inc); 1730 1731 return val; 1732 } 1733 1734 RValue<Short> operator--(Short &val, int) // Post-decrement 1735 { 1736 RValue<Short> res = val; 1737 1738 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1))); 1739 val.storeValue(inc); 1740 1741 return res; 1742 } 1743 1744 const Short &operator--(Short &val) // Pre-decrement 1745 { 1746 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1747 val.storeValue(inc); 1748 1749 return val; 1750 } 1751 1752 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs) 1753 { 1754 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1755 } 1756 1757 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs) 1758 { 1759 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1760 } 1761 1762 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs) 1763 { 1764 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1765 } 1766 1767 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs) 1768 { 1769 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1770 } 1771 1772 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs) 1773 { 1774 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1775 } 1776 1777 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs) 1778 { 1779 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1780 } 1781 1782 Type *Short::getType() 1783 { 1784 return T(llvm::Type::getInt16Ty(*::context)); 1785 } 1786 1787 UShort::UShort(Argument<UShort> argument) 1788 { 1789 storeValue(argument.value); 1790 } 1791 1792 UShort::UShort(RValue<UInt> cast) 1793 { 1794 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1795 1796 storeValue(integer); 1797 } 1798 1799 UShort::UShort(RValue<Int> cast) 1800 { 1801 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1802 1803 storeValue(integer); 1804 } 1805 1806 UShort::UShort(unsigned short x) 1807 { 1808 storeValue(Nucleus::createConstantShort(x)); 1809 } 1810 1811 UShort::UShort(RValue<UShort> rhs) 1812 { 1813 storeValue(rhs.value); 1814 } 1815 1816 UShort::UShort(const UShort &rhs) 1817 { 1818 Value *value = rhs.loadValue(); 1819 storeValue(value); 1820 } 1821 1822 UShort::UShort(const Reference<UShort> &rhs) 1823 { 1824 Value *value = rhs.loadValue(); 1825 storeValue(value); 1826 } 1827 1828 RValue<UShort> UShort::operator=(RValue<UShort> rhs) 1829 { 1830 storeValue(rhs.value); 1831 1832 return rhs; 1833 } 1834 1835 RValue<UShort> UShort::operator=(const UShort &rhs) 1836 { 1837 Value *value = rhs.loadValue(); 1838 storeValue(value); 1839 1840 return RValue<UShort>(value); 1841 } 1842 1843 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) 1844 { 1845 Value *value = rhs.loadValue(); 1846 storeValue(value); 1847 1848 return RValue<UShort>(value); 1849 } 1850 1851 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs) 1852 { 1853 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value)); 1854 } 1855 1856 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs) 1857 { 1858 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value)); 1859 } 1860 1861 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs) 1862 { 1863 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value)); 1864 } 1865 1866 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs) 1867 { 1868 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value)); 1869 } 1870 1871 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs) 1872 { 1873 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value)); 1874 } 1875 1876 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs) 1877 { 1878 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value)); 1879 } 1880 1881 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs) 1882 { 1883 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value)); 1884 } 1885 1886 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs) 1887 { 1888 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value)); 1889 } 1890 1891 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs) 1892 { 1893 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value)); 1894 } 1895 1896 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs) 1897 { 1898 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value)); 1899 } 1900 1901 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs) 1902 { 1903 return lhs = lhs + rhs; 1904 } 1905 1906 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs) 1907 { 1908 return lhs = lhs - rhs; 1909 } 1910 1911 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs) 1912 { 1913 return lhs = lhs * rhs; 1914 } 1915 1916 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs) 1917 { 1918 return lhs = lhs / rhs; 1919 } 1920 1921 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs) 1922 { 1923 return lhs = lhs % rhs; 1924 } 1925 1926 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs) 1927 { 1928 return lhs = lhs & rhs; 1929 } 1930 1931 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs) 1932 { 1933 return lhs = lhs | rhs; 1934 } 1935 1936 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs) 1937 { 1938 return lhs = lhs ^ rhs; 1939 } 1940 1941 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs) 1942 { 1943 return lhs = lhs << rhs; 1944 } 1945 1946 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs) 1947 { 1948 return lhs = lhs >> rhs; 1949 } 1950 1951 RValue<UShort> operator+(RValue<UShort> val) 1952 { 1953 return val; 1954 } 1955 1956 RValue<UShort> operator-(RValue<UShort> val) 1957 { 1958 return RValue<UShort>(Nucleus::createNeg(val.value)); 1959 } 1960 1961 RValue<UShort> operator~(RValue<UShort> val) 1962 { 1963 return RValue<UShort>(Nucleus::createNot(val.value)); 1964 } 1965 1966 RValue<UShort> operator++(UShort &val, int) // Post-increment 1967 { 1968 RValue<UShort> res = val; 1969 1970 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1971 val.storeValue(inc); 1972 1973 return res; 1974 } 1975 1976 const UShort &operator++(UShort &val) // Pre-increment 1977 { 1978 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1979 val.storeValue(inc); 1980 1981 return val; 1982 } 1983 1984 RValue<UShort> operator--(UShort &val, int) // Post-decrement 1985 { 1986 RValue<UShort> res = val; 1987 1988 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1989 val.storeValue(inc); 1990 1991 return res; 1992 } 1993 1994 const UShort &operator--(UShort &val) // Pre-decrement 1995 { 1996 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1997 val.storeValue(inc); 1998 1999 return val; 2000 } 2001 2002 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs) 2003 { 2004 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 2005 } 2006 2007 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs) 2008 { 2009 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 2010 } 2011 2012 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs) 2013 { 2014 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 2015 } 2016 2017 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs) 2018 { 2019 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 2020 } 2021 2022 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs) 2023 { 2024 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 2025 } 2026 2027 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs) 2028 { 2029 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 2030 } 2031 2032 Type *UShort::getType() 2033 { 2034 return T(llvm::Type::getInt16Ty(*::context)); 2035 } 2036 2037 Byte4::Byte4(RValue<Byte8> cast) 2038 { 2039 storeValue(Nucleus::createBitCast(cast.value, getType())); 2040 } 2041 2042 Byte4::Byte4(const Reference<Byte4> &rhs) 2043 { 2044 Value *value = rhs.loadValue(); 2045 storeValue(value); 2046 } 2047 2048 Type *Byte4::getType() 2049 { 2050 return T(Type_v4i8); 2051 } 2052 2053 Type *SByte4::getType() 2054 { 2055 return T(Type_v4i8); 2056 } 2057 2058 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2059 { 2060 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2061 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2062 } 2063 2064 Byte8::Byte8(RValue<Byte8> rhs) 2065 { 2066 storeValue(rhs.value); 2067 } 2068 2069 Byte8::Byte8(const Byte8 &rhs) 2070 { 2071 Value *value = rhs.loadValue(); 2072 storeValue(value); 2073 } 2074 2075 Byte8::Byte8(const Reference<Byte8> &rhs) 2076 { 2077 Value *value = rhs.loadValue(); 2078 storeValue(value); 2079 } 2080 2081 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) 2082 { 2083 storeValue(rhs.value); 2084 2085 return rhs; 2086 } 2087 2088 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) 2089 { 2090 Value *value = rhs.loadValue(); 2091 storeValue(value); 2092 2093 return RValue<Byte8>(value); 2094 } 2095 2096 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) 2097 { 2098 Value *value = rhs.loadValue(); 2099 storeValue(value); 2100 2101 return RValue<Byte8>(value); 2102 } 2103 2104 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) 2105 { 2106 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2107 } 2108 2109 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) 2110 { 2111 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value)); 2112 } 2113 2114 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) 2115 // { 2116 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value)); 2117 // } 2118 2119 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs) 2120 // { 2121 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value)); 2122 // } 2123 2124 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs) 2125 // { 2126 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value)); 2127 // } 2128 2129 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) 2130 { 2131 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2132 } 2133 2134 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) 2135 { 2136 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value)); 2137 } 2138 2139 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) 2140 { 2141 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value)); 2142 } 2143 2144 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) 2145 // { 2146 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value)); 2147 // } 2148 2149 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs) 2150 // { 2151 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value)); 2152 // } 2153 2154 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs) 2155 { 2156 return lhs = lhs + rhs; 2157 } 2158 2159 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs) 2160 { 2161 return lhs = lhs - rhs; 2162 } 2163 2164 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs) 2165 // { 2166 // return lhs = lhs * rhs; 2167 // } 2168 2169 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs) 2170 // { 2171 // return lhs = lhs / rhs; 2172 // } 2173 2174 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs) 2175 // { 2176 // return lhs = lhs % rhs; 2177 // } 2178 2179 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs) 2180 { 2181 return lhs = lhs & rhs; 2182 } 2183 2184 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs) 2185 { 2186 return lhs = lhs | rhs; 2187 } 2188 2189 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs) 2190 { 2191 return lhs = lhs ^ rhs; 2192 } 2193 2194 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs) 2195 // { 2196 // return lhs = lhs << rhs; 2197 // } 2198 2199 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs) 2200 // { 2201 // return lhs = lhs >> rhs; 2202 // } 2203 2204 // RValue<Byte8> operator+(RValue<Byte8> val) 2205 // { 2206 // return val; 2207 // } 2208 2209 // RValue<Byte8> operator-(RValue<Byte8> val) 2210 // { 2211 // return RValue<Byte8>(Nucleus::createNeg(val.value)); 2212 // } 2213 2214 RValue<Byte8> operator~(RValue<Byte8> val) 2215 { 2216 return RValue<Byte8>(Nucleus::createNot(val.value)); 2217 } 2218 2219 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) 2220 { 2221 return x86::paddusb(x, y); 2222 } 2223 2224 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y) 2225 { 2226 return x86::psubusb(x, y); 2227 } 2228 2229 RValue<Short4> Unpack(RValue<Byte4> x) 2230 { 2231 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8 2232 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 2233 } 2234 2235 RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y) 2236 { 2237 return UnpackLow(As<Byte8>(x), As<Byte8>(y)); 2238 } 2239 2240 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) 2241 { 2242 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2243 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2244 } 2245 2246 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) 2247 { 2248 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2249 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2250 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2251 } 2252 2253 RValue<Int> SignMask(RValue<Byte8> x) 2254 { 2255 return x86::pmovmskb(x); 2256 } 2257 2258 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) 2259 // { 2260 // return x86::pcmpgtb(x, y); // FIXME: Signedness 2261 // } 2262 2263 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y) 2264 { 2265 return x86::pcmpeqb(x, y); 2266 } 2267 2268 Type *Byte8::getType() 2269 { 2270 return T(Type_v8i8); 2271 } 2272 2273 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2274 { 2275 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2276 Value *vector = V(Nucleus::createConstantVector(constantVector, getType())); 2277 2278 storeValue(Nucleus::createBitCast(vector, getType())); 2279 } 2280 2281 SByte8::SByte8(RValue<SByte8> rhs) 2282 { 2283 storeValue(rhs.value); 2284 } 2285 2286 SByte8::SByte8(const SByte8 &rhs) 2287 { 2288 Value *value = rhs.loadValue(); 2289 storeValue(value); 2290 } 2291 2292 SByte8::SByte8(const Reference<SByte8> &rhs) 2293 { 2294 Value *value = rhs.loadValue(); 2295 storeValue(value); 2296 } 2297 2298 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) 2299 { 2300 storeValue(rhs.value); 2301 2302 return rhs; 2303 } 2304 2305 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) 2306 { 2307 Value *value = rhs.loadValue(); 2308 storeValue(value); 2309 2310 return RValue<SByte8>(value); 2311 } 2312 2313 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) 2314 { 2315 Value *value = rhs.loadValue(); 2316 storeValue(value); 2317 2318 return RValue<SByte8>(value); 2319 } 2320 2321 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) 2322 { 2323 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2324 } 2325 2326 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) 2327 { 2328 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value)); 2329 } 2330 2331 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) 2332 // { 2333 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value)); 2334 // } 2335 2336 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs) 2337 // { 2338 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value)); 2339 // } 2340 2341 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs) 2342 // { 2343 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value)); 2344 // } 2345 2346 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs) 2347 { 2348 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2349 } 2350 2351 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs) 2352 { 2353 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value)); 2354 } 2355 2356 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs) 2357 { 2358 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value)); 2359 } 2360 2361 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs) 2362 // { 2363 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value)); 2364 // } 2365 2366 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) 2367 // { 2368 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value)); 2369 // } 2370 2371 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs) 2372 { 2373 return lhs = lhs + rhs; 2374 } 2375 2376 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs) 2377 { 2378 return lhs = lhs - rhs; 2379 } 2380 2381 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs) 2382 // { 2383 // return lhs = lhs * rhs; 2384 // } 2385 2386 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs) 2387 // { 2388 // return lhs = lhs / rhs; 2389 // } 2390 2391 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs) 2392 // { 2393 // return lhs = lhs % rhs; 2394 // } 2395 2396 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs) 2397 { 2398 return lhs = lhs & rhs; 2399 } 2400 2401 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs) 2402 { 2403 return lhs = lhs | rhs; 2404 } 2405 2406 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs) 2407 { 2408 return lhs = lhs ^ rhs; 2409 } 2410 2411 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs) 2412 // { 2413 // return lhs = lhs << rhs; 2414 // } 2415 2416 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs) 2417 // { 2418 // return lhs = lhs >> rhs; 2419 // } 2420 2421 // RValue<SByte8> operator+(RValue<SByte8> val) 2422 // { 2423 // return val; 2424 // } 2425 2426 // RValue<SByte8> operator-(RValue<SByte8> val) 2427 // { 2428 // return RValue<SByte8>(Nucleus::createNeg(val.value)); 2429 // } 2430 2431 RValue<SByte8> operator~(RValue<SByte8> val) 2432 { 2433 return RValue<SByte8>(Nucleus::createNot(val.value)); 2434 } 2435 2436 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) 2437 { 2438 return x86::paddsb(x, y); 2439 } 2440 2441 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y) 2442 { 2443 return x86::psubsb(x, y); 2444 } 2445 2446 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) 2447 { 2448 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2449 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2450 } 2451 2452 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) 2453 { 2454 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2455 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2456 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2457 } 2458 2459 RValue<Int> SignMask(RValue<SByte8> x) 2460 { 2461 return x86::pmovmskb(As<Byte8>(x)); 2462 } 2463 2464 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) 2465 { 2466 return x86::pcmpgtb(x, y); 2467 } 2468 2469 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y) 2470 { 2471 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y)); 2472 } 2473 2474 Type *SByte8::getType() 2475 { 2476 return T(Type_v8i8); 2477 } 2478 2479 Byte16::Byte16(RValue<Byte16> rhs) 2480 { 2481 storeValue(rhs.value); 2482 } 2483 2484 Byte16::Byte16(const Byte16 &rhs) 2485 { 2486 Value *value = rhs.loadValue(); 2487 storeValue(value); 2488 } 2489 2490 Byte16::Byte16(const Reference<Byte16> &rhs) 2491 { 2492 Value *value = rhs.loadValue(); 2493 storeValue(value); 2494 } 2495 2496 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) 2497 { 2498 storeValue(rhs.value); 2499 2500 return rhs; 2501 } 2502 2503 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) 2504 { 2505 Value *value = rhs.loadValue(); 2506 storeValue(value); 2507 2508 return RValue<Byte16>(value); 2509 } 2510 2511 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) 2512 { 2513 Value *value = rhs.loadValue(); 2514 storeValue(value); 2515 2516 return RValue<Byte16>(value); 2517 } 2518 2519 Type *Byte16::getType() 2520 { 2521 return T(llvm::VectorType::get(T(Byte::getType()), 16)); 2522 } 2523 2524 Type *SByte16::getType() 2525 { 2526 return T(llvm::VectorType::get(T(SByte::getType()), 16)); 2527 } 2528 2529 Short2::Short2(RValue<Short4> cast) 2530 { 2531 storeValue(Nucleus::createBitCast(cast.value, getType())); 2532 } 2533 2534 Type *Short2::getType() 2535 { 2536 return T(Type_v2i16); 2537 } 2538 2539 UShort2::UShort2(RValue<UShort4> cast) 2540 { 2541 storeValue(Nucleus::createBitCast(cast.value, getType())); 2542 } 2543 2544 Type *UShort2::getType() 2545 { 2546 return T(Type_v2i16); 2547 } 2548 2549 Short4::Short4(RValue<Int> cast) 2550 { 2551 Value *vector = loadValue(); 2552 Value *element = Nucleus::createTrunc(cast.value, Short::getType()); 2553 Value *insert = Nucleus::createInsertElement(vector, element, 0); 2554 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value; 2555 2556 storeValue(swizzle); 2557 } 2558 2559 Short4::Short4(RValue<Int4> cast) 2560 { 2561 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6}; 2562 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); 2563 2564 Value *packed = Nucleus::createShuffleVector(short8, short8, select); 2565 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value; 2566 2567 storeValue(short4); 2568 } 2569 2570 // Short4::Short4(RValue<Float> cast) 2571 // { 2572 // } 2573 2574 Short4::Short4(RValue<Float4> cast) 2575 { 2576 Int4 v4i32 = Int4(cast); 2577 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32)); 2578 2579 storeValue(As<Short4>(Int2(v4i32)).value); 2580 } 2581 2582 Short4::Short4(short xyzw) 2583 { 2584 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 2585 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2586 } 2587 2588 Short4::Short4(short x, short y, short z, short w) 2589 { 2590 int64_t constantVector[4] = {x, y, z, w}; 2591 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2592 } 2593 2594 Short4::Short4(RValue<Short4> rhs) 2595 { 2596 storeValue(rhs.value); 2597 } 2598 2599 Short4::Short4(const Short4 &rhs) 2600 { 2601 Value *value = rhs.loadValue(); 2602 storeValue(value); 2603 } 2604 2605 Short4::Short4(const Reference<Short4> &rhs) 2606 { 2607 Value *value = rhs.loadValue(); 2608 storeValue(value); 2609 } 2610 2611 Short4::Short4(RValue<UShort4> rhs) 2612 { 2613 storeValue(rhs.value); 2614 } 2615 2616 Short4::Short4(const UShort4 &rhs) 2617 { 2618 storeValue(rhs.loadValue()); 2619 } 2620 2621 Short4::Short4(const Reference<UShort4> &rhs) 2622 { 2623 storeValue(rhs.loadValue()); 2624 } 2625 2626 RValue<Short4> Short4::operator=(RValue<Short4> rhs) 2627 { 2628 storeValue(rhs.value); 2629 2630 return rhs; 2631 } 2632 2633 RValue<Short4> Short4::operator=(const Short4 &rhs) 2634 { 2635 Value *value = rhs.loadValue(); 2636 storeValue(value); 2637 2638 return RValue<Short4>(value); 2639 } 2640 2641 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) 2642 { 2643 Value *value = rhs.loadValue(); 2644 storeValue(value); 2645 2646 return RValue<Short4>(value); 2647 } 2648 2649 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) 2650 { 2651 storeValue(rhs.value); 2652 2653 return RValue<Short4>(rhs); 2654 } 2655 2656 RValue<Short4> Short4::operator=(const UShort4 &rhs) 2657 { 2658 Value *value = rhs.loadValue(); 2659 storeValue(value); 2660 2661 return RValue<Short4>(value); 2662 } 2663 2664 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) 2665 { 2666 Value *value = rhs.loadValue(); 2667 storeValue(value); 2668 2669 return RValue<Short4>(value); 2670 } 2671 2672 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) 2673 { 2674 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value)); 2675 } 2676 2677 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) 2678 { 2679 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value)); 2680 } 2681 2682 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) 2683 { 2684 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value)); 2685 } 2686 2687 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) 2688 // { 2689 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value)); 2690 // } 2691 2692 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs) 2693 // { 2694 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value)); 2695 // } 2696 2697 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) 2698 { 2699 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value)); 2700 } 2701 2702 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) 2703 { 2704 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value)); 2705 } 2706 2707 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) 2708 { 2709 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value)); 2710 } 2711 2712 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) 2713 { 2714 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2715 2716 return x86::psllw(lhs, rhs); 2717 } 2718 2719 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs) 2720 { 2721 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2722 2723 return x86::psraw(lhs, rhs); 2724 } 2725 2726 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs) 2727 { 2728 return lhs = lhs + rhs; 2729 } 2730 2731 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs) 2732 { 2733 return lhs = lhs - rhs; 2734 } 2735 2736 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs) 2737 { 2738 return lhs = lhs * rhs; 2739 } 2740 2741 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs) 2742 // { 2743 // return lhs = lhs / rhs; 2744 // } 2745 2746 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs) 2747 // { 2748 // return lhs = lhs % rhs; 2749 // } 2750 2751 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs) 2752 { 2753 return lhs = lhs & rhs; 2754 } 2755 2756 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs) 2757 { 2758 return lhs = lhs | rhs; 2759 } 2760 2761 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs) 2762 { 2763 return lhs = lhs ^ rhs; 2764 } 2765 2766 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs) 2767 { 2768 return lhs = lhs << rhs; 2769 } 2770 2771 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs) 2772 { 2773 return lhs = lhs >> rhs; 2774 } 2775 2776 // RValue<Short4> operator+(RValue<Short4> val) 2777 // { 2778 // return val; 2779 // } 2780 2781 RValue<Short4> operator-(RValue<Short4> val) 2782 { 2783 return RValue<Short4>(Nucleus::createNeg(val.value)); 2784 } 2785 2786 RValue<Short4> operator~(RValue<Short4> val) 2787 { 2788 return RValue<Short4>(Nucleus::createNot(val.value)); 2789 } 2790 2791 RValue<Short4> RoundShort4(RValue<Float4> cast) 2792 { 2793 RValue<Int4> int4 = RoundInt(cast); 2794 return As<Short4>(Pack(int4, int4)); 2795 } 2796 2797 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) 2798 { 2799 return x86::pmaxsw(x, y); 2800 } 2801 2802 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y) 2803 { 2804 return x86::pminsw(x, y); 2805 } 2806 2807 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y) 2808 { 2809 return x86::paddsw(x, y); 2810 } 2811 2812 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y) 2813 { 2814 return x86::psubsw(x, y); 2815 } 2816 2817 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y) 2818 { 2819 return x86::pmulhw(x, y); 2820 } 2821 2822 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y) 2823 { 2824 return x86::pmaddwd(x, y); 2825 } 2826 2827 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y) 2828 { 2829 auto result = x86::packsswb(x, y); 2830 2831 return As<SByte8>(Swizzle(As<Int4>(result), 0x88)); 2832 } 2833 2834 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) 2835 { 2836 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 2837 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2838 } 2839 2840 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) 2841 { 2842 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 2843 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2844 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2845 } 2846 2847 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) 2848 { 2849 // Real type is v8i16 2850 int shuffle[8] = 2851 { 2852 (select >> 0) & 0x03, 2853 (select >> 2) & 0x03, 2854 (select >> 4) & 0x03, 2855 (select >> 6) & 0x03, 2856 (select >> 0) & 0x03, 2857 (select >> 2) & 0x03, 2858 (select >> 4) & 0x03, 2859 (select >> 6) & 0x03, 2860 }; 2861 2862 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 2863 } 2864 2865 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) 2866 { 2867 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i)); 2868 } 2869 2870 RValue<Short> Extract(RValue<Short4> val, int i) 2871 { 2872 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i)); 2873 } 2874 2875 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) 2876 { 2877 return x86::pcmpgtw(x, y); 2878 } 2879 2880 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y) 2881 { 2882 return x86::pcmpeqw(x, y); 2883 } 2884 2885 Type *Short4::getType() 2886 { 2887 return T(Type_v4i16); 2888 } 2889 2890 UShort4::UShort4(RValue<Int4> cast) 2891 { 2892 *this = Short4(cast); 2893 } 2894 2895 UShort4::UShort4(RValue<Float4> cast, bool saturate) 2896 { 2897 if(saturate) 2898 { 2899 if(CPUID::supportsSSE4_1()) 2900 { 2901 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation 2902 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4))); 2903 } 2904 else 2905 { 2906 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)))); 2907 } 2908 } 2909 else 2910 { 2911 *this = Short4(Int4(cast)); 2912 } 2913 } 2914 2915 UShort4::UShort4(unsigned short xyzw) 2916 { 2917 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 2918 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2919 } 2920 2921 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 2922 { 2923 int64_t constantVector[4] = {x, y, z, w}; 2924 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2925 } 2926 2927 UShort4::UShort4(RValue<UShort4> rhs) 2928 { 2929 storeValue(rhs.value); 2930 } 2931 2932 UShort4::UShort4(const UShort4 &rhs) 2933 { 2934 Value *value = rhs.loadValue(); 2935 storeValue(value); 2936 } 2937 2938 UShort4::UShort4(const Reference<UShort4> &rhs) 2939 { 2940 Value *value = rhs.loadValue(); 2941 storeValue(value); 2942 } 2943 2944 UShort4::UShort4(RValue<Short4> rhs) 2945 { 2946 storeValue(rhs.value); 2947 } 2948 2949 UShort4::UShort4(const Short4 &rhs) 2950 { 2951 Value *value = rhs.loadValue(); 2952 storeValue(value); 2953 } 2954 2955 UShort4::UShort4(const Reference<Short4> &rhs) 2956 { 2957 Value *value = rhs.loadValue(); 2958 storeValue(value); 2959 } 2960 2961 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) 2962 { 2963 storeValue(rhs.value); 2964 2965 return rhs; 2966 } 2967 2968 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) 2969 { 2970 Value *value = rhs.loadValue(); 2971 storeValue(value); 2972 2973 return RValue<UShort4>(value); 2974 } 2975 2976 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) 2977 { 2978 Value *value = rhs.loadValue(); 2979 storeValue(value); 2980 2981 return RValue<UShort4>(value); 2982 } 2983 2984 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) 2985 { 2986 storeValue(rhs.value); 2987 2988 return RValue<UShort4>(rhs); 2989 } 2990 2991 RValue<UShort4> UShort4::operator=(const Short4 &rhs) 2992 { 2993 Value *value = rhs.loadValue(); 2994 storeValue(value); 2995 2996 return RValue<UShort4>(value); 2997 } 2998 2999 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) 3000 { 3001 Value *value = rhs.loadValue(); 3002 storeValue(value); 3003 3004 return RValue<UShort4>(value); 3005 } 3006 3007 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) 3008 { 3009 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value)); 3010 } 3011 3012 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) 3013 { 3014 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value)); 3015 } 3016 3017 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) 3018 { 3019 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value)); 3020 } 3021 3022 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs) 3023 { 3024 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value)); 3025 } 3026 3027 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs) 3028 { 3029 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value)); 3030 } 3031 3032 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs) 3033 { 3034 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value)); 3035 } 3036 3037 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) 3038 { 3039 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3040 3041 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3042 } 3043 3044 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs) 3045 { 3046 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3047 3048 return x86::psrlw(lhs, rhs); 3049 } 3050 3051 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs) 3052 { 3053 return lhs = lhs << rhs; 3054 } 3055 3056 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs) 3057 { 3058 return lhs = lhs >> rhs; 3059 } 3060 3061 RValue<UShort4> operator~(RValue<UShort4> val) 3062 { 3063 return RValue<UShort4>(Nucleus::createNot(val.value)); 3064 } 3065 3066 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) 3067 { 3068 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3069 } 3070 3071 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y) 3072 { 3073 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3074 } 3075 3076 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y) 3077 { 3078 return x86::paddusw(x, y); 3079 } 3080 3081 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y) 3082 { 3083 return x86::psubusw(x, y); 3084 } 3085 3086 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y) 3087 { 3088 return x86::pmulhuw(x, y); 3089 } 3090 3091 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) 3092 { 3093 return x86::pavgw(x, y); 3094 } 3095 3096 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y) 3097 { 3098 auto result = x86::packuswb(x, y); 3099 3100 return As<Byte8>(Swizzle(As<Int4>(result), 0x88)); 3101 } 3102 3103 Type *UShort4::getType() 3104 { 3105 return T(Type_v4i16); 3106 } 3107 3108 Short8::Short8(short c) 3109 { 3110 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3111 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3112 } 3113 3114 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) 3115 { 3116 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3117 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3118 } 3119 3120 Short8::Short8(RValue<Short8> rhs) 3121 { 3122 storeValue(rhs.value); 3123 } 3124 3125 Short8::Short8(const Reference<Short8> &rhs) 3126 { 3127 Value *value = rhs.loadValue(); 3128 storeValue(value); 3129 } 3130 3131 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) 3132 { 3133 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3134 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3135 3136 storeValue(packed); 3137 } 3138 3139 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) 3140 { 3141 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value)); 3142 } 3143 3144 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs) 3145 { 3146 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value)); 3147 } 3148 3149 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs) 3150 { 3151 return x86::psllw(lhs, rhs); // FIXME: Fallback required 3152 } 3153 3154 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs) 3155 { 3156 return x86::psraw(lhs, rhs); // FIXME: Fallback required 3157 } 3158 3159 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y) 3160 { 3161 return x86::pmaddwd(x, y); // FIXME: Fallback required 3162 } 3163 3164 RValue<Int4> Abs(RValue<Int4> x) 3165 { 3166 auto negative = x >> 31; 3167 return (x ^ negative) - negative; 3168 } 3169 3170 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) 3171 { 3172 return x86::pmulhw(x, y); // FIXME: Fallback required 3173 } 3174 3175 Type *Short8::getType() 3176 { 3177 return T(llvm::VectorType::get(T(Short::getType()), 8)); 3178 } 3179 3180 UShort8::UShort8(unsigned short c) 3181 { 3182 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3183 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3184 } 3185 3186 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) 3187 { 3188 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3189 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3190 } 3191 3192 UShort8::UShort8(RValue<UShort8> rhs) 3193 { 3194 storeValue(rhs.value); 3195 } 3196 3197 UShort8::UShort8(const Reference<UShort8> &rhs) 3198 { 3199 Value *value = rhs.loadValue(); 3200 storeValue(value); 3201 } 3202 3203 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) 3204 { 3205 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3206 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3207 3208 storeValue(packed); 3209 } 3210 3211 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) 3212 { 3213 storeValue(rhs.value); 3214 3215 return rhs; 3216 } 3217 3218 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) 3219 { 3220 Value *value = rhs.loadValue(); 3221 storeValue(value); 3222 3223 return RValue<UShort8>(value); 3224 } 3225 3226 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) 3227 { 3228 Value *value = rhs.loadValue(); 3229 storeValue(value); 3230 3231 return RValue<UShort8>(value); 3232 } 3233 3234 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs) 3235 { 3236 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value)); 3237 } 3238 3239 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs) 3240 { 3241 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required 3242 } 3243 3244 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) 3245 { 3246 return x86::psrlw(lhs, rhs); // FIXME: Fallback required 3247 } 3248 3249 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs) 3250 { 3251 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value)); 3252 } 3253 3254 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs) 3255 { 3256 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value)); 3257 } 3258 3259 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs) 3260 { 3261 return lhs = lhs + rhs; 3262 } 3263 3264 RValue<UShort8> operator~(RValue<UShort8> val) 3265 { 3266 return RValue<UShort8>(Nucleus::createNot(val.value)); 3267 } 3268 3269 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7) 3270 { 3271 int pshufb[16] = 3272 { 3273 select0 + 0, 3274 select0 + 1, 3275 select1 + 0, 3276 select1 + 1, 3277 select2 + 0, 3278 select2 + 1, 3279 select3 + 0, 3280 select3 + 1, 3281 select4 + 0, 3282 select4 + 1, 3283 select5 + 0, 3284 select5 + 1, 3285 select6 + 0, 3286 select6 + 1, 3287 select7 + 0, 3288 select7 + 1, 3289 }; 3290 3291 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType()); 3292 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb); 3293 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 3294 3295 return RValue<UShort8>(short8); 3296 } 3297 3298 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) 3299 { 3300 return x86::pmulhuw(x, y); // FIXME: Fallback required 3301 } 3302 3303 Type *UShort8::getType() 3304 { 3305 return T(llvm::VectorType::get(T(UShort::getType()), 8)); 3306 } 3307 3308 Int::Int(Argument<Int> argument) 3309 { 3310 storeValue(argument.value); 3311 } 3312 3313 Int::Int(RValue<Byte> cast) 3314 { 3315 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3316 3317 storeValue(integer); 3318 } 3319 3320 Int::Int(RValue<SByte> cast) 3321 { 3322 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3323 3324 storeValue(integer); 3325 } 3326 3327 Int::Int(RValue<Short> cast) 3328 { 3329 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3330 3331 storeValue(integer); 3332 } 3333 3334 Int::Int(RValue<UShort> cast) 3335 { 3336 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3337 3338 storeValue(integer); 3339 } 3340 3341 Int::Int(RValue<Int2> cast) 3342 { 3343 *this = Extract(cast, 0); 3344 } 3345 3346 Int::Int(RValue<Long> cast) 3347 { 3348 Value *integer = Nucleus::createTrunc(cast.value, Int::getType()); 3349 3350 storeValue(integer); 3351 } 3352 3353 Int::Int(RValue<Float> cast) 3354 { 3355 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType()); 3356 3357 storeValue(integer); 3358 } 3359 3360 Int::Int(int x) 3361 { 3362 storeValue(Nucleus::createConstantInt(x)); 3363 } 3364 3365 Int::Int(RValue<Int> rhs) 3366 { 3367 storeValue(rhs.value); 3368 } 3369 3370 Int::Int(RValue<UInt> rhs) 3371 { 3372 storeValue(rhs.value); 3373 } 3374 3375 Int::Int(const Int &rhs) 3376 { 3377 Value *value = rhs.loadValue(); 3378 storeValue(value); 3379 } 3380 3381 Int::Int(const Reference<Int> &rhs) 3382 { 3383 Value *value = rhs.loadValue(); 3384 storeValue(value); 3385 } 3386 3387 Int::Int(const UInt &rhs) 3388 { 3389 Value *value = rhs.loadValue(); 3390 storeValue(value); 3391 } 3392 3393 Int::Int(const Reference<UInt> &rhs) 3394 { 3395 Value *value = rhs.loadValue(); 3396 storeValue(value); 3397 } 3398 3399 RValue<Int> Int::operator=(int rhs) 3400 { 3401 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs))); 3402 } 3403 3404 RValue<Int> Int::operator=(RValue<Int> rhs) 3405 { 3406 storeValue(rhs.value); 3407 3408 return rhs; 3409 } 3410 3411 RValue<Int> Int::operator=(RValue<UInt> rhs) 3412 { 3413 storeValue(rhs.value); 3414 3415 return RValue<Int>(rhs); 3416 } 3417 3418 RValue<Int> Int::operator=(const Int &rhs) 3419 { 3420 Value *value = rhs.loadValue(); 3421 storeValue(value); 3422 3423 return RValue<Int>(value); 3424 } 3425 3426 RValue<Int> Int::operator=(const Reference<Int> &rhs) 3427 { 3428 Value *value = rhs.loadValue(); 3429 storeValue(value); 3430 3431 return RValue<Int>(value); 3432 } 3433 3434 RValue<Int> Int::operator=(const UInt &rhs) 3435 { 3436 Value *value = rhs.loadValue(); 3437 storeValue(value); 3438 3439 return RValue<Int>(value); 3440 } 3441 3442 RValue<Int> Int::operator=(const Reference<UInt> &rhs) 3443 { 3444 Value *value = rhs.loadValue(); 3445 storeValue(value); 3446 3447 return RValue<Int>(value); 3448 } 3449 3450 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs) 3451 { 3452 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value)); 3453 } 3454 3455 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs) 3456 { 3457 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value)); 3458 } 3459 3460 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs) 3461 { 3462 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value)); 3463 } 3464 3465 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs) 3466 { 3467 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value)); 3468 } 3469 3470 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs) 3471 { 3472 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value)); 3473 } 3474 3475 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs) 3476 { 3477 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value)); 3478 } 3479 3480 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs) 3481 { 3482 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value)); 3483 } 3484 3485 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs) 3486 { 3487 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value)); 3488 } 3489 3490 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs) 3491 { 3492 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value)); 3493 } 3494 3495 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs) 3496 { 3497 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value)); 3498 } 3499 3500 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs) 3501 { 3502 return lhs = lhs + rhs; 3503 } 3504 3505 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs) 3506 { 3507 return lhs = lhs - rhs; 3508 } 3509 3510 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs) 3511 { 3512 return lhs = lhs * rhs; 3513 } 3514 3515 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs) 3516 { 3517 return lhs = lhs / rhs; 3518 } 3519 3520 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs) 3521 { 3522 return lhs = lhs % rhs; 3523 } 3524 3525 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs) 3526 { 3527 return lhs = lhs & rhs; 3528 } 3529 3530 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs) 3531 { 3532 return lhs = lhs | rhs; 3533 } 3534 3535 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs) 3536 { 3537 return lhs = lhs ^ rhs; 3538 } 3539 3540 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs) 3541 { 3542 return lhs = lhs << rhs; 3543 } 3544 3545 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs) 3546 { 3547 return lhs = lhs >> rhs; 3548 } 3549 3550 RValue<Int> operator+(RValue<Int> val) 3551 { 3552 return val; 3553 } 3554 3555 RValue<Int> operator-(RValue<Int> val) 3556 { 3557 return RValue<Int>(Nucleus::createNeg(val.value)); 3558 } 3559 3560 RValue<Int> operator~(RValue<Int> val) 3561 { 3562 return RValue<Int>(Nucleus::createNot(val.value)); 3563 } 3564 3565 RValue<Int> operator++(Int &val, int) // Post-increment 3566 { 3567 RValue<Int> res = val; 3568 3569 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 3570 val.storeValue(inc); 3571 3572 return res; 3573 } 3574 3575 const Int &operator++(Int &val) // Pre-increment 3576 { 3577 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 3578 val.storeValue(inc); 3579 3580 return val; 3581 } 3582 3583 RValue<Int> operator--(Int &val, int) // Post-decrement 3584 { 3585 RValue<Int> res = val; 3586 3587 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 3588 val.storeValue(inc); 3589 3590 return res; 3591 } 3592 3593 const Int &operator--(Int &val) // Pre-decrement 3594 { 3595 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 3596 val.storeValue(inc); 3597 3598 return val; 3599 } 3600 3601 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs) 3602 { 3603 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 3604 } 3605 3606 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs) 3607 { 3608 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 3609 } 3610 3611 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs) 3612 { 3613 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 3614 } 3615 3616 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs) 3617 { 3618 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 3619 } 3620 3621 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs) 3622 { 3623 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 3624 } 3625 3626 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs) 3627 { 3628 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 3629 } 3630 3631 RValue<Int> Max(RValue<Int> x, RValue<Int> y) 3632 { 3633 return IfThenElse(x > y, x, y); 3634 } 3635 3636 RValue<Int> Min(RValue<Int> x, RValue<Int> y) 3637 { 3638 return IfThenElse(x < y, x, y); 3639 } 3640 3641 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max) 3642 { 3643 return Min(Max(x, min), max); 3644 } 3645 3646 RValue<Int> RoundInt(RValue<Float> cast) 3647 { 3648 return x86::cvtss2si(cast); 3649 3650 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 3651 } 3652 3653 Type *Int::getType() 3654 { 3655 return T(llvm::Type::getInt32Ty(*::context)); 3656 } 3657 3658 Long::Long(RValue<Int> cast) 3659 { 3660 Value *integer = Nucleus::createSExt(cast.value, Long::getType()); 3661 3662 storeValue(integer); 3663 } 3664 3665 Long::Long(RValue<UInt> cast) 3666 { 3667 Value *integer = Nucleus::createZExt(cast.value, Long::getType()); 3668 3669 storeValue(integer); 3670 } 3671 3672 Long::Long(RValue<Long> rhs) 3673 { 3674 storeValue(rhs.value); 3675 } 3676 3677 RValue<Long> Long::operator=(int64_t rhs) 3678 { 3679 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs))); 3680 } 3681 3682 RValue<Long> Long::operator=(RValue<Long> rhs) 3683 { 3684 storeValue(rhs.value); 3685 3686 return rhs; 3687 } 3688 3689 RValue<Long> Long::operator=(const Long &rhs) 3690 { 3691 Value *value = rhs.loadValue(); 3692 storeValue(value); 3693 3694 return RValue<Long>(value); 3695 } 3696 3697 RValue<Long> Long::operator=(const Reference<Long> &rhs) 3698 { 3699 Value *value = rhs.loadValue(); 3700 storeValue(value); 3701 3702 return RValue<Long>(value); 3703 } 3704 3705 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs) 3706 { 3707 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value)); 3708 } 3709 3710 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs) 3711 { 3712 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); 3713 } 3714 3715 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs) 3716 { 3717 return lhs = lhs + rhs; 3718 } 3719 3720 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs) 3721 { 3722 return lhs = lhs - rhs; 3723 } 3724 3725 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y) 3726 { 3727 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value)); 3728 } 3729 3730 Type *Long::getType() 3731 { 3732 return T(llvm::Type::getInt64Ty(*::context)); 3733 } 3734 3735 UInt::UInt(Argument<UInt> argument) 3736 { 3737 storeValue(argument.value); 3738 } 3739 3740 UInt::UInt(RValue<UShort> cast) 3741 { 3742 Value *integer = Nucleus::createZExt(cast.value, UInt::getType()); 3743 3744 storeValue(integer); 3745 } 3746 3747 UInt::UInt(RValue<Long> cast) 3748 { 3749 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType()); 3750 3751 storeValue(integer); 3752 } 3753 3754 UInt::UInt(RValue<Float> cast) 3755 { 3756 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 3757 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType()); 3758 3759 // Smallest positive value representable in UInt, but not in Int 3760 const unsigned int ustart = 0x80000000u; 3761 const float ustartf = float(ustart); 3762 3763 // If the value is negative, store 0, otherwise store the result of the conversion 3764 storeValue((~(As<Int>(cast) >> 31) & 3765 // Check if the value can be represented as an Int 3766 IfThenElse(cast >= ustartf, 3767 // If the value is too large, subtract ustart and re-add it after conversion. 3768 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)), 3769 // Otherwise, just convert normally 3770 Int(cast))).value); 3771 } 3772 3773 UInt::UInt(int x) 3774 { 3775 storeValue(Nucleus::createConstantInt(x)); 3776 } 3777 3778 UInt::UInt(unsigned int x) 3779 { 3780 storeValue(Nucleus::createConstantInt(x)); 3781 } 3782 3783 UInt::UInt(RValue<UInt> rhs) 3784 { 3785 storeValue(rhs.value); 3786 } 3787 3788 UInt::UInt(RValue<Int> rhs) 3789 { 3790 storeValue(rhs.value); 3791 } 3792 3793 UInt::UInt(const UInt &rhs) 3794 { 3795 Value *value = rhs.loadValue(); 3796 storeValue(value); 3797 } 3798 3799 UInt::UInt(const Reference<UInt> &rhs) 3800 { 3801 Value *value = rhs.loadValue(); 3802 storeValue(value); 3803 } 3804 3805 UInt::UInt(const Int &rhs) 3806 { 3807 Value *value = rhs.loadValue(); 3808 storeValue(value); 3809 } 3810 3811 UInt::UInt(const Reference<Int> &rhs) 3812 { 3813 Value *value = rhs.loadValue(); 3814 storeValue(value); 3815 } 3816 3817 RValue<UInt> UInt::operator=(unsigned int rhs) 3818 { 3819 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs))); 3820 } 3821 3822 RValue<UInt> UInt::operator=(RValue<UInt> rhs) 3823 { 3824 storeValue(rhs.value); 3825 3826 return rhs; 3827 } 3828 3829 RValue<UInt> UInt::operator=(RValue<Int> rhs) 3830 { 3831 storeValue(rhs.value); 3832 3833 return RValue<UInt>(rhs); 3834 } 3835 3836 RValue<UInt> UInt::operator=(const UInt &rhs) 3837 { 3838 Value *value = rhs.loadValue(); 3839 storeValue(value); 3840 3841 return RValue<UInt>(value); 3842 } 3843 3844 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) 3845 { 3846 Value *value = rhs.loadValue(); 3847 storeValue(value); 3848 3849 return RValue<UInt>(value); 3850 } 3851 3852 RValue<UInt> UInt::operator=(const Int &rhs) 3853 { 3854 Value *value = rhs.loadValue(); 3855 storeValue(value); 3856 3857 return RValue<UInt>(value); 3858 } 3859 3860 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) 3861 { 3862 Value *value = rhs.loadValue(); 3863 storeValue(value); 3864 3865 return RValue<UInt>(value); 3866 } 3867 3868 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs) 3869 { 3870 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value)); 3871 } 3872 3873 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs) 3874 { 3875 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value)); 3876 } 3877 3878 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs) 3879 { 3880 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value)); 3881 } 3882 3883 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs) 3884 { 3885 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value)); 3886 } 3887 3888 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs) 3889 { 3890 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value)); 3891 } 3892 3893 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs) 3894 { 3895 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value)); 3896 } 3897 3898 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs) 3899 { 3900 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value)); 3901 } 3902 3903 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs) 3904 { 3905 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value)); 3906 } 3907 3908 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs) 3909 { 3910 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value)); 3911 } 3912 3913 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs) 3914 { 3915 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value)); 3916 } 3917 3918 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs) 3919 { 3920 return lhs = lhs + rhs; 3921 } 3922 3923 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs) 3924 { 3925 return lhs = lhs - rhs; 3926 } 3927 3928 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs) 3929 { 3930 return lhs = lhs * rhs; 3931 } 3932 3933 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs) 3934 { 3935 return lhs = lhs / rhs; 3936 } 3937 3938 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs) 3939 { 3940 return lhs = lhs % rhs; 3941 } 3942 3943 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs) 3944 { 3945 return lhs = lhs & rhs; 3946 } 3947 3948 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs) 3949 { 3950 return lhs = lhs | rhs; 3951 } 3952 3953 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs) 3954 { 3955 return lhs = lhs ^ rhs; 3956 } 3957 3958 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs) 3959 { 3960 return lhs = lhs << rhs; 3961 } 3962 3963 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs) 3964 { 3965 return lhs = lhs >> rhs; 3966 } 3967 3968 RValue<UInt> operator+(RValue<UInt> val) 3969 { 3970 return val; 3971 } 3972 3973 RValue<UInt> operator-(RValue<UInt> val) 3974 { 3975 return RValue<UInt>(Nucleus::createNeg(val.value)); 3976 } 3977 3978 RValue<UInt> operator~(RValue<UInt> val) 3979 { 3980 return RValue<UInt>(Nucleus::createNot(val.value)); 3981 } 3982 3983 RValue<UInt> operator++(UInt &val, int) // Post-increment 3984 { 3985 RValue<UInt> res = val; 3986 3987 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 3988 val.storeValue(inc); 3989 3990 return res; 3991 } 3992 3993 const UInt &operator++(UInt &val) // Pre-increment 3994 { 3995 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 3996 val.storeValue(inc); 3997 3998 return val; 3999 } 4000 4001 RValue<UInt> operator--(UInt &val, int) // Post-decrement 4002 { 4003 RValue<UInt> res = val; 4004 4005 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 4006 val.storeValue(inc); 4007 4008 return res; 4009 } 4010 4011 const UInt &operator--(UInt &val) // Pre-decrement 4012 { 4013 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 4014 val.storeValue(inc); 4015 4016 return val; 4017 } 4018 4019 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y) 4020 { 4021 return IfThenElse(x > y, x, y); 4022 } 4023 4024 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y) 4025 { 4026 return IfThenElse(x < y, x, y); 4027 } 4028 4029 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max) 4030 { 4031 return Min(Max(x, min), max); 4032 } 4033 4034 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs) 4035 { 4036 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 4037 } 4038 4039 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs) 4040 { 4041 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 4042 } 4043 4044 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs) 4045 { 4046 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 4047 } 4048 4049 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs) 4050 { 4051 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 4052 } 4053 4054 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs) 4055 { 4056 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4057 } 4058 4059 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs) 4060 { 4061 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4062 } 4063 4064 // RValue<UInt> RoundUInt(RValue<Float> cast) 4065 // { 4066 // return x86::cvtss2si(val); // FIXME: Unsigned 4067 // 4068 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4069 // } 4070 4071 Type *UInt::getType() 4072 { 4073 return T(llvm::Type::getInt32Ty(*::context)); 4074 } 4075 4076 // Int2::Int2(RValue<Int> cast) 4077 // { 4078 // Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 4079 // Value *vector = Nucleus::createBitCast(extend, Int2::getType()); 4080 // 4081 // int shuffle[2] = {0, 0}; 4082 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle); 4083 // 4084 // storeValue(replicate); 4085 // } 4086 4087 Int2::Int2(RValue<Int4> cast) 4088 { 4089 storeValue(Nucleus::createBitCast(cast.value, getType())); 4090 } 4091 4092 Int2::Int2(int x, int y) 4093 { 4094 int64_t constantVector[2] = {x, y}; 4095 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4096 } 4097 4098 Int2::Int2(RValue<Int2> rhs) 4099 { 4100 storeValue(rhs.value); 4101 } 4102 4103 Int2::Int2(const Int2 &rhs) 4104 { 4105 Value *value = rhs.loadValue(); 4106 storeValue(value); 4107 } 4108 4109 Int2::Int2(const Reference<Int2> &rhs) 4110 { 4111 Value *value = rhs.loadValue(); 4112 storeValue(value); 4113 } 4114 4115 Int2::Int2(RValue<Int> lo, RValue<Int> hi) 4116 { 4117 int shuffle[4] = {0, 4, 1, 5}; 4118 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle); 4119 4120 storeValue(Nucleus::createBitCast(packed, Int2::getType())); 4121 } 4122 4123 RValue<Int2> Int2::operator=(RValue<Int2> rhs) 4124 { 4125 storeValue(rhs.value); 4126 4127 return rhs; 4128 } 4129 4130 RValue<Int2> Int2::operator=(const Int2 &rhs) 4131 { 4132 Value *value = rhs.loadValue(); 4133 storeValue(value); 4134 4135 return RValue<Int2>(value); 4136 } 4137 4138 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) 4139 { 4140 Value *value = rhs.loadValue(); 4141 storeValue(value); 4142 4143 return RValue<Int2>(value); 4144 } 4145 4146 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) 4147 { 4148 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value)); 4149 } 4150 4151 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) 4152 { 4153 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value)); 4154 } 4155 4156 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) 4157 // { 4158 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value)); 4159 // } 4160 4161 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs) 4162 // { 4163 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value)); 4164 // } 4165 4166 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs) 4167 // { 4168 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value)); 4169 // } 4170 4171 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) 4172 { 4173 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value)); 4174 } 4175 4176 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) 4177 { 4178 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value)); 4179 } 4180 4181 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) 4182 { 4183 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value)); 4184 } 4185 4186 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) 4187 { 4188 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4189 4190 return x86::pslld(lhs, rhs); 4191 } 4192 4193 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs) 4194 { 4195 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4196 4197 return x86::psrad(lhs, rhs); 4198 } 4199 4200 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs) 4201 { 4202 return lhs = lhs + rhs; 4203 } 4204 4205 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs) 4206 { 4207 return lhs = lhs - rhs; 4208 } 4209 4210 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs) 4211 // { 4212 // return lhs = lhs * rhs; 4213 // } 4214 4215 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs) 4216 // { 4217 // return lhs = lhs / rhs; 4218 // } 4219 4220 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs) 4221 // { 4222 // return lhs = lhs % rhs; 4223 // } 4224 4225 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs) 4226 { 4227 return lhs = lhs & rhs; 4228 } 4229 4230 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs) 4231 { 4232 return lhs = lhs | rhs; 4233 } 4234 4235 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs) 4236 { 4237 return lhs = lhs ^ rhs; 4238 } 4239 4240 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs) 4241 { 4242 return lhs = lhs << rhs; 4243 } 4244 4245 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs) 4246 { 4247 return lhs = lhs >> rhs; 4248 } 4249 4250 // RValue<Int2> operator+(RValue<Int2> val) 4251 // { 4252 // return val; 4253 // } 4254 4255 // RValue<Int2> operator-(RValue<Int2> val) 4256 // { 4257 // return RValue<Int2>(Nucleus::createNeg(val.value)); 4258 // } 4259 4260 RValue<Int2> operator~(RValue<Int2> val) 4261 { 4262 return RValue<Int2>(Nucleus::createNot(val.value)); 4263 } 4264 4265 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) 4266 { 4267 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 4268 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 4269 } 4270 4271 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) 4272 { 4273 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 4274 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 4275 return As<Short4>(Swizzle(lowHigh, 0xEE)); 4276 } 4277 4278 RValue<Int> Extract(RValue<Int2> val, int i) 4279 { 4280 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i)); 4281 } 4282 4283 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) 4284 { 4285 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i)); 4286 } 4287 4288 Type *Int2::getType() 4289 { 4290 return T(Type_v2i32); 4291 } 4292 4293 UInt2::UInt2(unsigned int x, unsigned int y) 4294 { 4295 int64_t constantVector[2] = {x, y}; 4296 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4297 } 4298 4299 UInt2::UInt2(RValue<UInt2> rhs) 4300 { 4301 storeValue(rhs.value); 4302 } 4303 4304 UInt2::UInt2(const UInt2 &rhs) 4305 { 4306 Value *value = rhs.loadValue(); 4307 storeValue(value); 4308 } 4309 4310 UInt2::UInt2(const Reference<UInt2> &rhs) 4311 { 4312 Value *value = rhs.loadValue(); 4313 storeValue(value); 4314 } 4315 4316 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) 4317 { 4318 storeValue(rhs.value); 4319 4320 return rhs; 4321 } 4322 4323 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) 4324 { 4325 Value *value = rhs.loadValue(); 4326 storeValue(value); 4327 4328 return RValue<UInt2>(value); 4329 } 4330 4331 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) 4332 { 4333 Value *value = rhs.loadValue(); 4334 storeValue(value); 4335 4336 return RValue<UInt2>(value); 4337 } 4338 4339 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) 4340 { 4341 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value)); 4342 } 4343 4344 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) 4345 { 4346 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value)); 4347 } 4348 4349 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) 4350 // { 4351 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value)); 4352 // } 4353 4354 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs) 4355 // { 4356 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value)); 4357 // } 4358 4359 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs) 4360 // { 4361 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value)); 4362 // } 4363 4364 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) 4365 { 4366 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value)); 4367 } 4368 4369 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) 4370 { 4371 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value)); 4372 } 4373 4374 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) 4375 { 4376 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value)); 4377 } 4378 4379 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) 4380 { 4381 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 4382 4383 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 4384 } 4385 4386 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs) 4387 { 4388 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 4389 4390 return x86::psrld(lhs, rhs); 4391 } 4392 4393 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs) 4394 { 4395 return lhs = lhs + rhs; 4396 } 4397 4398 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs) 4399 { 4400 return lhs = lhs - rhs; 4401 } 4402 4403 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs) 4404 // { 4405 // return lhs = lhs * rhs; 4406 // } 4407 4408 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs) 4409 // { 4410 // return lhs = lhs / rhs; 4411 // } 4412 4413 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs) 4414 // { 4415 // return lhs = lhs % rhs; 4416 // } 4417 4418 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs) 4419 { 4420 return lhs = lhs & rhs; 4421 } 4422 4423 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs) 4424 { 4425 return lhs = lhs | rhs; 4426 } 4427 4428 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs) 4429 { 4430 return lhs = lhs ^ rhs; 4431 } 4432 4433 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs) 4434 { 4435 return lhs = lhs << rhs; 4436 } 4437 4438 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs) 4439 { 4440 return lhs = lhs >> rhs; 4441 } 4442 4443 // RValue<UInt2> operator+(RValue<UInt2> val) 4444 // { 4445 // return val; 4446 // } 4447 4448 // RValue<UInt2> operator-(RValue<UInt2> val) 4449 // { 4450 // return RValue<UInt2>(Nucleus::createNeg(val.value)); 4451 // } 4452 4453 RValue<UInt2> operator~(RValue<UInt2> val) 4454 { 4455 return RValue<UInt2>(Nucleus::createNot(val.value)); 4456 } 4457 4458 Type *UInt2::getType() 4459 { 4460 return T(Type_v2i32); 4461 } 4462 4463 Int4::Int4() : XYZW(this) 4464 { 4465 } 4466 4467 Int4::Int4(RValue<Byte4> cast) : XYZW(this) 4468 { 4469 if(CPUID::supportsSSE4_1()) 4470 { 4471 *this = x86::pmovzxbd(As<Byte16>(cast)); 4472 } 4473 else 4474 { 4475 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; 4476 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 4477 Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle); 4478 4479 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 4480 Value *c = Nucleus::createBitCast(b, Short8::getType()); 4481 Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2); 4482 4483 *this = As<Int4>(d); 4484 } 4485 } 4486 4487 Int4::Int4(RValue<SByte4> cast) : XYZW(this) 4488 { 4489 if(CPUID::supportsSSE4_1()) 4490 { 4491 *this = x86::pmovsxbd(As<SByte16>(cast)); 4492 } 4493 else 4494 { 4495 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; 4496 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 4497 Value *b = Nucleus::createShuffleVector(a, a, swizzle); 4498 4499 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 4500 Value *c = Nucleus::createBitCast(b, Short8::getType()); 4501 Value *d = Nucleus::createShuffleVector(c, c, swizzle2); 4502 4503 *this = As<Int4>(d) >> 24; 4504 } 4505 } 4506 4507 Int4::Int4(RValue<Float4> cast) : XYZW(this) 4508 { 4509 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType()); 4510 4511 storeValue(xyzw); 4512 } 4513 4514 Int4::Int4(RValue<Short4> cast) : XYZW(this) 4515 { 4516 if(CPUID::supportsSSE4_1()) 4517 { 4518 *this = x86::pmovsxwd(As<Short8>(cast)); 4519 } 4520 else 4521 { 4522 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 4523 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle); 4524 *this = As<Int4>(c) >> 16; 4525 } 4526 } 4527 4528 Int4::Int4(RValue<UShort4> cast) : XYZW(this) 4529 { 4530 if(CPUID::supportsSSE4_1()) 4531 { 4532 *this = x86::pmovzxwd(As<UShort8>(cast)); 4533 } 4534 else 4535 { 4536 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 4537 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle); 4538 *this = As<Int4>(c); 4539 } 4540 } 4541 4542 Int4::Int4(int xyzw) : XYZW(this) 4543 { 4544 constant(xyzw, xyzw, xyzw, xyzw); 4545 } 4546 4547 Int4::Int4(int x, int yzw) : XYZW(this) 4548 { 4549 constant(x, yzw, yzw, yzw); 4550 } 4551 4552 Int4::Int4(int x, int y, int zw) : XYZW(this) 4553 { 4554 constant(x, y, zw, zw); 4555 } 4556 4557 Int4::Int4(int x, int y, int z, int w) : XYZW(this) 4558 { 4559 constant(x, y, z, w); 4560 } 4561 4562 void Int4::constant(int x, int y, int z, int w) 4563 { 4564 int64_t constantVector[4] = {x, y, z, w}; 4565 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4566 } 4567 4568 Int4::Int4(RValue<Int4> rhs) : XYZW(this) 4569 { 4570 storeValue(rhs.value); 4571 } 4572 4573 Int4::Int4(const Int4 &rhs) : XYZW(this) 4574 { 4575 Value *value = rhs.loadValue(); 4576 storeValue(value); 4577 } 4578 4579 Int4::Int4(const Reference<Int4> &rhs) : XYZW(this) 4580 { 4581 Value *value = rhs.loadValue(); 4582 storeValue(value); 4583 } 4584 4585 Int4::Int4(RValue<UInt4> rhs) : XYZW(this) 4586 { 4587 storeValue(rhs.value); 4588 } 4589 4590 Int4::Int4(const UInt4 &rhs) : XYZW(this) 4591 { 4592 Value *value = rhs.loadValue(); 4593 storeValue(value); 4594 } 4595 4596 Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this) 4597 { 4598 Value *value = rhs.loadValue(); 4599 storeValue(value); 4600 } 4601 4602 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this) 4603 { 4604 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 4605 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 4606 4607 storeValue(packed); 4608 } 4609 4610 Int4::Int4(RValue<Int> rhs) : XYZW(this) 4611 { 4612 Value *vector = loadValue(); 4613 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 4614 4615 int swizzle[4] = {0, 0, 0, 0}; 4616 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 4617 4618 storeValue(replicate); 4619 } 4620 4621 Int4::Int4(const Int &rhs) : XYZW(this) 4622 { 4623 *this = RValue<Int>(rhs.loadValue()); 4624 } 4625 4626 Int4::Int4(const Reference<Int> &rhs) : XYZW(this) 4627 { 4628 *this = RValue<Int>(rhs.loadValue()); 4629 } 4630 4631 RValue<Int4> Int4::operator=(RValue<Int4> rhs) 4632 { 4633 storeValue(rhs.value); 4634 4635 return rhs; 4636 } 4637 4638 RValue<Int4> Int4::operator=(const Int4 &rhs) 4639 { 4640 Value *value = rhs.loadValue(); 4641 storeValue(value); 4642 4643 return RValue<Int4>(value); 4644 } 4645 4646 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) 4647 { 4648 Value *value = rhs.loadValue(); 4649 storeValue(value); 4650 4651 return RValue<Int4>(value); 4652 } 4653 4654 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs) 4655 { 4656 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value)); 4657 } 4658 4659 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs) 4660 { 4661 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value)); 4662 } 4663 4664 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs) 4665 { 4666 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value)); 4667 } 4668 4669 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs) 4670 { 4671 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value)); 4672 } 4673 4674 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs) 4675 { 4676 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value)); 4677 } 4678 4679 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs) 4680 { 4681 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value)); 4682 } 4683 4684 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs) 4685 { 4686 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value)); 4687 } 4688 4689 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs) 4690 { 4691 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value)); 4692 } 4693 4694 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs) 4695 { 4696 return x86::pslld(lhs, rhs); 4697 } 4698 4699 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs) 4700 { 4701 return x86::psrad(lhs, rhs); 4702 } 4703 4704 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs) 4705 { 4706 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value)); 4707 } 4708 4709 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs) 4710 { 4711 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value)); 4712 } 4713 4714 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs) 4715 { 4716 return lhs = lhs + rhs; 4717 } 4718 4719 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs) 4720 { 4721 return lhs = lhs - rhs; 4722 } 4723 4724 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs) 4725 { 4726 return lhs = lhs * rhs; 4727 } 4728 4729 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs) 4730 // { 4731 // return lhs = lhs / rhs; 4732 // } 4733 4734 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs) 4735 // { 4736 // return lhs = lhs % rhs; 4737 // } 4738 4739 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs) 4740 { 4741 return lhs = lhs & rhs; 4742 } 4743 4744 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs) 4745 { 4746 return lhs = lhs | rhs; 4747 } 4748 4749 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs) 4750 { 4751 return lhs = lhs ^ rhs; 4752 } 4753 4754 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs) 4755 { 4756 return lhs = lhs << rhs; 4757 } 4758 4759 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs) 4760 { 4761 return lhs = lhs >> rhs; 4762 } 4763 4764 RValue<Int4> operator+(RValue<Int4> val) 4765 { 4766 return val; 4767 } 4768 4769 RValue<Int4> operator-(RValue<Int4> val) 4770 { 4771 return RValue<Int4>(Nucleus::createNeg(val.value)); 4772 } 4773 4774 RValue<Int4> operator~(RValue<Int4> val) 4775 { 4776 return RValue<Int4>(Nucleus::createNot(val.value)); 4777 } 4778 4779 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y) 4780 { 4781 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4782 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4783 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 4784 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4785 } 4786 4787 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y) 4788 { 4789 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())); 4790 } 4791 4792 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y) 4793 { 4794 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4795 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4796 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())); 4797 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4798 } 4799 4800 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y) 4801 { 4802 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 4803 } 4804 4805 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y) 4806 { 4807 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4808 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4809 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())); 4810 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4811 } 4812 4813 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y) 4814 { 4815 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())); 4816 } 4817 4818 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y) 4819 { 4820 if(CPUID::supportsSSE4_1()) 4821 { 4822 return x86::pmaxsd(x, y); 4823 } 4824 else 4825 { 4826 RValue<Int4> greater = CmpNLE(x, y); 4827 return (x & greater) | (y & ~greater); 4828 } 4829 } 4830 4831 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y) 4832 { 4833 if(CPUID::supportsSSE4_1()) 4834 { 4835 return x86::pminsd(x, y); 4836 } 4837 else 4838 { 4839 RValue<Int4> less = CmpLT(x, y); 4840 return (x & less) | (y & ~less); 4841 } 4842 } 4843 4844 RValue<Int4> RoundInt(RValue<Float4> cast) 4845 { 4846 return x86::cvtps2dq(cast); 4847 } 4848 4849 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y) 4850 { 4851 return x86::packssdw(x, y); 4852 } 4853 4854 RValue<Int> Extract(RValue<Int4> x, int i) 4855 { 4856 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i)); 4857 } 4858 4859 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i) 4860 { 4861 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i)); 4862 } 4863 4864 RValue<Int> SignMask(RValue<Int4> x) 4865 { 4866 return x86::movmskps(As<Float4>(x)); 4867 } 4868 4869 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) 4870 { 4871 return RValue<Int4>(createSwizzle4(x.value, select)); 4872 } 4873 4874 Type *Int4::getType() 4875 { 4876 return T(llvm::VectorType::get(T(Int::getType()), 4)); 4877 } 4878 4879 UInt4::UInt4() : XYZW(this) 4880 { 4881 } 4882 4883 UInt4::UInt4(RValue<Float4> cast) : XYZW(this) 4884 { 4885 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 4886 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType()); 4887 4888 // Smallest positive value representable in UInt, but not in Int 4889 const unsigned int ustart = 0x80000000u; 4890 const float ustartf = float(ustart); 4891 4892 // Check if the value can be represented as an Int 4893 Int4 uiValue = CmpNLT(cast, Float4(ustartf)); 4894 // If the value is too large, subtract ustart and re-add it after conversion. 4895 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) | 4896 // Otherwise, just convert normally 4897 (~uiValue & Int4(cast)); 4898 // If the value is negative, store 0, otherwise store the result of the conversion 4899 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value); 4900 } 4901 4902 UInt4::UInt4(int xyzw) : XYZW(this) 4903 { 4904 constant(xyzw, xyzw, xyzw, xyzw); 4905 } 4906 4907 UInt4::UInt4(int x, int yzw) : XYZW(this) 4908 { 4909 constant(x, yzw, yzw, yzw); 4910 } 4911 4912 UInt4::UInt4(int x, int y, int zw) : XYZW(this) 4913 { 4914 constant(x, y, zw, zw); 4915 } 4916 4917 UInt4::UInt4(int x, int y, int z, int w) : XYZW(this) 4918 { 4919 constant(x, y, z, w); 4920 } 4921 4922 void UInt4::constant(int x, int y, int z, int w) 4923 { 4924 int64_t constantVector[4] = {x, y, z, w}; 4925 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4926 } 4927 4928 UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this) 4929 { 4930 storeValue(rhs.value); 4931 } 4932 4933 UInt4::UInt4(const UInt4 &rhs) : XYZW(this) 4934 { 4935 Value *value = rhs.loadValue(); 4936 storeValue(value); 4937 } 4938 4939 UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this) 4940 { 4941 Value *value = rhs.loadValue(); 4942 storeValue(value); 4943 } 4944 4945 UInt4::UInt4(RValue<Int4> rhs) : XYZW(this) 4946 { 4947 storeValue(rhs.value); 4948 } 4949 4950 UInt4::UInt4(const Int4 &rhs) : XYZW(this) 4951 { 4952 Value *value = rhs.loadValue(); 4953 storeValue(value); 4954 } 4955 4956 UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this) 4957 { 4958 Value *value = rhs.loadValue(); 4959 storeValue(value); 4960 } 4961 4962 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this) 4963 { 4964 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 4965 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 4966 4967 storeValue(packed); 4968 } 4969 4970 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) 4971 { 4972 storeValue(rhs.value); 4973 4974 return rhs; 4975 } 4976 4977 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) 4978 { 4979 Value *value = rhs.loadValue(); 4980 storeValue(value); 4981 4982 return RValue<UInt4>(value); 4983 } 4984 4985 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) 4986 { 4987 Value *value = rhs.loadValue(); 4988 storeValue(value); 4989 4990 return RValue<UInt4>(value); 4991 } 4992 4993 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs) 4994 { 4995 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value)); 4996 } 4997 4998 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs) 4999 { 5000 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value)); 5001 } 5002 5003 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs) 5004 { 5005 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value)); 5006 } 5007 5008 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs) 5009 { 5010 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value)); 5011 } 5012 5013 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs) 5014 { 5015 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value)); 5016 } 5017 5018 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs) 5019 { 5020 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value)); 5021 } 5022 5023 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs) 5024 { 5025 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value)); 5026 } 5027 5028 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs) 5029 { 5030 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value)); 5031 } 5032 5033 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs) 5034 { 5035 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs)); 5036 } 5037 5038 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs) 5039 { 5040 return x86::psrld(lhs, rhs); 5041 } 5042 5043 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs) 5044 { 5045 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value)); 5046 } 5047 5048 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs) 5049 { 5050 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value)); 5051 } 5052 5053 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs) 5054 { 5055 return lhs = lhs + rhs; 5056 } 5057 5058 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs) 5059 { 5060 return lhs = lhs - rhs; 5061 } 5062 5063 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs) 5064 { 5065 return lhs = lhs * rhs; 5066 } 5067 5068 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs) 5069 // { 5070 // return lhs = lhs / rhs; 5071 // } 5072 5073 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs) 5074 // { 5075 // return lhs = lhs % rhs; 5076 // } 5077 5078 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs) 5079 { 5080 return lhs = lhs & rhs; 5081 } 5082 5083 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs) 5084 { 5085 return lhs = lhs | rhs; 5086 } 5087 5088 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs) 5089 { 5090 return lhs = lhs ^ rhs; 5091 } 5092 5093 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs) 5094 { 5095 return lhs = lhs << rhs; 5096 } 5097 5098 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs) 5099 { 5100 return lhs = lhs >> rhs; 5101 } 5102 5103 RValue<UInt4> operator+(RValue<UInt4> val) 5104 { 5105 return val; 5106 } 5107 5108 RValue<UInt4> operator-(RValue<UInt4> val) 5109 { 5110 return RValue<UInt4>(Nucleus::createNeg(val.value)); 5111 } 5112 5113 RValue<UInt4> operator~(RValue<UInt4> val) 5114 { 5115 return RValue<UInt4>(Nucleus::createNot(val.value)); 5116 } 5117 5118 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y) 5119 { 5120 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5121 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5122 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5123 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5124 } 5125 5126 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y) 5127 { 5128 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())); 5129 } 5130 5131 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y) 5132 { 5133 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5134 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5135 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType())); 5136 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5137 } 5138 5139 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y) 5140 { 5141 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5142 } 5143 5144 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y) 5145 { 5146 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5147 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5148 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType())); 5149 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5150 } 5151 5152 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y) 5153 { 5154 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())); 5155 } 5156 5157 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y) 5158 { 5159 if(CPUID::supportsSSE4_1()) 5160 { 5161 return x86::pmaxud(x, y); 5162 } 5163 else 5164 { 5165 RValue<UInt4> greater = CmpNLE(x, y); 5166 return (x & greater) | (y & ~greater); 5167 } 5168 } 5169 5170 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y) 5171 { 5172 if(CPUID::supportsSSE4_1()) 5173 { 5174 return x86::pminud(x, y); 5175 } 5176 else 5177 { 5178 RValue<UInt4> less = CmpLT(x, y); 5179 return (x & less) | (y & ~less); 5180 } 5181 } 5182 5183 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y) 5184 { 5185 return x86::packusdw(As<Int4>(x), As<Int4>(y)); 5186 } 5187 5188 Type *UInt4::getType() 5189 { 5190 return T(llvm::VectorType::get(T(UInt::getType()), 4)); 5191 } 5192 5193 Float::Float(RValue<Int> cast) 5194 { 5195 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType()); 5196 5197 storeValue(integer); 5198 } 5199 5200 Float::Float(RValue<UInt> cast) 5201 { 5202 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) + 5203 As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u))); 5204 5205 storeValue(result.value); 5206 } 5207 5208 Float::Float(float x) 5209 { 5210 storeValue(Nucleus::createConstantFloat(x)); 5211 } 5212 5213 Float::Float(RValue<Float> rhs) 5214 { 5215 storeValue(rhs.value); 5216 } 5217 5218 Float::Float(const Float &rhs) 5219 { 5220 Value *value = rhs.loadValue(); 5221 storeValue(value); 5222 } 5223 5224 Float::Float(const Reference<Float> &rhs) 5225 { 5226 Value *value = rhs.loadValue(); 5227 storeValue(value); 5228 } 5229 5230 RValue<Float> Float::operator=(RValue<Float> rhs) 5231 { 5232 storeValue(rhs.value); 5233 5234 return rhs; 5235 } 5236 5237 RValue<Float> Float::operator=(const Float &rhs) 5238 { 5239 Value *value = rhs.loadValue(); 5240 storeValue(value); 5241 5242 return RValue<Float>(value); 5243 } 5244 5245 RValue<Float> Float::operator=(const Reference<Float> &rhs) 5246 { 5247 Value *value = rhs.loadValue(); 5248 storeValue(value); 5249 5250 return RValue<Float>(value); 5251 } 5252 5253 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs) 5254 { 5255 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value)); 5256 } 5257 5258 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs) 5259 { 5260 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value)); 5261 } 5262 5263 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs) 5264 { 5265 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value)); 5266 } 5267 5268 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs) 5269 { 5270 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value)); 5271 } 5272 5273 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs) 5274 { 5275 return lhs = lhs + rhs; 5276 } 5277 5278 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs) 5279 { 5280 return lhs = lhs - rhs; 5281 } 5282 5283 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs) 5284 { 5285 return lhs = lhs * rhs; 5286 } 5287 5288 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs) 5289 { 5290 return lhs = lhs / rhs; 5291 } 5292 5293 RValue<Float> operator+(RValue<Float> val) 5294 { 5295 return val; 5296 } 5297 5298 RValue<Float> operator-(RValue<Float> val) 5299 { 5300 return RValue<Float>(Nucleus::createFNeg(val.value)); 5301 } 5302 5303 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs) 5304 { 5305 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value)); 5306 } 5307 5308 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs) 5309 { 5310 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value)); 5311 } 5312 5313 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs) 5314 { 5315 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value)); 5316 } 5317 5318 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs) 5319 { 5320 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value)); 5321 } 5322 5323 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs) 5324 { 5325 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value)); 5326 } 5327 5328 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs) 5329 { 5330 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value)); 5331 } 5332 5333 RValue<Float> Abs(RValue<Float> x) 5334 { 5335 return IfThenElse(x > 0.0f, x, -x); 5336 } 5337 5338 RValue<Float> Max(RValue<Float> x, RValue<Float> y) 5339 { 5340 return IfThenElse(x > y, x, y); 5341 } 5342 5343 RValue<Float> Min(RValue<Float> x, RValue<Float> y) 5344 { 5345 return IfThenElse(x < y, x, y); 5346 } 5347 5348 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2) 5349 { 5350 #if defined(__i386__) || defined(__x86_64__) 5351 if(exactAtPow2) 5352 { 5353 // rcpss uses a piecewise-linear approximation which minimizes the relative error 5354 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 5355 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 5356 } 5357 #endif 5358 5359 return x86::rcpss(x); 5360 } 5361 5362 RValue<Float> RcpSqrt_pp(RValue<Float> x) 5363 { 5364 return x86::rsqrtss(x); 5365 } 5366 5367 RValue<Float> Sqrt(RValue<Float> x) 5368 { 5369 return x86::sqrtss(x); 5370 } 5371 5372 RValue<Float> Round(RValue<Float> x) 5373 { 5374 if(CPUID::supportsSSE4_1()) 5375 { 5376 return x86::roundss(x, 0); 5377 } 5378 else 5379 { 5380 return Float4(Round(Float4(x))).x; 5381 } 5382 } 5383 5384 RValue<Float> Trunc(RValue<Float> x) 5385 { 5386 if(CPUID::supportsSSE4_1()) 5387 { 5388 return x86::roundss(x, 3); 5389 } 5390 else 5391 { 5392 return Float(Int(x)); // Rounded toward zero 5393 } 5394 } 5395 5396 RValue<Float> Frac(RValue<Float> x) 5397 { 5398 if(CPUID::supportsSSE4_1()) 5399 { 5400 return x - x86::floorss(x); 5401 } 5402 else 5403 { 5404 return Float4(Frac(Float4(x))).x; 5405 } 5406 } 5407 5408 RValue<Float> Floor(RValue<Float> x) 5409 { 5410 if(CPUID::supportsSSE4_1()) 5411 { 5412 return x86::floorss(x); 5413 } 5414 else 5415 { 5416 return Float4(Floor(Float4(x))).x; 5417 } 5418 } 5419 5420 RValue<Float> Ceil(RValue<Float> x) 5421 { 5422 if(CPUID::supportsSSE4_1()) 5423 { 5424 return x86::ceilss(x); 5425 } 5426 else 5427 { 5428 return Float4(Ceil(Float4(x))).x; 5429 } 5430 } 5431 5432 Type *Float::getType() 5433 { 5434 return T(llvm::Type::getFloatTy(*::context)); 5435 } 5436 5437 Float2::Float2(RValue<Float4> cast) 5438 { 5439 storeValue(Nucleus::createBitCast(cast.value, getType())); 5440 } 5441 5442 Type *Float2::getType() 5443 { 5444 return T(Type_v2f32); 5445 } 5446 5447 Float4::Float4(RValue<Byte4> cast) : XYZW(this) 5448 { 5449 Value *a = Int4(cast).loadValue(); 5450 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 5451 5452 storeValue(xyzw); 5453 } 5454 5455 Float4::Float4(RValue<SByte4> cast) : XYZW(this) 5456 { 5457 Value *a = Int4(cast).loadValue(); 5458 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 5459 5460 storeValue(xyzw); 5461 } 5462 5463 Float4::Float4(RValue<Short4> cast) : XYZW(this) 5464 { 5465 Int4 c(cast); 5466 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 5467 } 5468 5469 Float4::Float4(RValue<UShort4> cast) : XYZW(this) 5470 { 5471 Int4 c(cast); 5472 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 5473 } 5474 5475 Float4::Float4(RValue<Int4> cast) : XYZW(this) 5476 { 5477 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); 5478 5479 storeValue(xyzw); 5480 } 5481 5482 Float4::Float4(RValue<UInt4> cast) : XYZW(this) 5483 { 5484 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) + 5485 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u))); 5486 5487 storeValue(result.value); 5488 } 5489 5490 Float4::Float4() : XYZW(this) 5491 { 5492 } 5493 5494 Float4::Float4(float xyzw) : XYZW(this) 5495 { 5496 constant(xyzw, xyzw, xyzw, xyzw); 5497 } 5498 5499 Float4::Float4(float x, float yzw) : XYZW(this) 5500 { 5501 constant(x, yzw, yzw, yzw); 5502 } 5503 5504 Float4::Float4(float x, float y, float zw) : XYZW(this) 5505 { 5506 constant(x, y, zw, zw); 5507 } 5508 5509 Float4::Float4(float x, float y, float z, float w) : XYZW(this) 5510 { 5511 constant(x, y, z, w); 5512 } 5513 5514 void Float4::constant(float x, float y, float z, float w) 5515 { 5516 double constantVector[4] = {x, y, z, w}; 5517 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5518 } 5519 5520 Float4::Float4(RValue<Float4> rhs) : XYZW(this) 5521 { 5522 storeValue(rhs.value); 5523 } 5524 5525 Float4::Float4(const Float4 &rhs) : XYZW(this) 5526 { 5527 Value *value = rhs.loadValue(); 5528 storeValue(value); 5529 } 5530 5531 Float4::Float4(const Reference<Float4> &rhs) : XYZW(this) 5532 { 5533 Value *value = rhs.loadValue(); 5534 storeValue(value); 5535 } 5536 5537 Float4::Float4(RValue<Float> rhs) : XYZW(this) 5538 { 5539 Value *vector = loadValue(); 5540 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 5541 5542 int swizzle[4] = {0, 0, 0, 0}; 5543 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 5544 5545 storeValue(replicate); 5546 } 5547 5548 Float4::Float4(const Float &rhs) : XYZW(this) 5549 { 5550 *this = RValue<Float>(rhs.loadValue()); 5551 } 5552 5553 Float4::Float4(const Reference<Float> &rhs) : XYZW(this) 5554 { 5555 *this = RValue<Float>(rhs.loadValue()); 5556 } 5557 5558 RValue<Float4> Float4::operator=(float x) 5559 { 5560 return *this = Float4(x, x, x, x); 5561 } 5562 5563 RValue<Float4> Float4::operator=(RValue<Float4> rhs) 5564 { 5565 storeValue(rhs.value); 5566 5567 return rhs; 5568 } 5569 5570 RValue<Float4> Float4::operator=(const Float4 &rhs) 5571 { 5572 Value *value = rhs.loadValue(); 5573 storeValue(value); 5574 5575 return RValue<Float4>(value); 5576 } 5577 5578 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) 5579 { 5580 Value *value = rhs.loadValue(); 5581 storeValue(value); 5582 5583 return RValue<Float4>(value); 5584 } 5585 5586 RValue<Float4> Float4::operator=(RValue<Float> rhs) 5587 { 5588 return *this = Float4(rhs); 5589 } 5590 5591 RValue<Float4> Float4::operator=(const Float &rhs) 5592 { 5593 return *this = Float4(rhs); 5594 } 5595 5596 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) 5597 { 5598 return *this = Float4(rhs); 5599 } 5600 5601 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs) 5602 { 5603 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value)); 5604 } 5605 5606 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs) 5607 { 5608 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value)); 5609 } 5610 5611 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs) 5612 { 5613 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value)); 5614 } 5615 5616 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs) 5617 { 5618 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value)); 5619 } 5620 5621 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs) 5622 { 5623 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value)); 5624 } 5625 5626 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs) 5627 { 5628 return lhs = lhs + rhs; 5629 } 5630 5631 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs) 5632 { 5633 return lhs = lhs - rhs; 5634 } 5635 5636 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs) 5637 { 5638 return lhs = lhs * rhs; 5639 } 5640 5641 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs) 5642 { 5643 return lhs = lhs / rhs; 5644 } 5645 5646 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs) 5647 { 5648 return lhs = lhs % rhs; 5649 } 5650 5651 RValue<Float4> operator+(RValue<Float4> val) 5652 { 5653 return val; 5654 } 5655 5656 RValue<Float4> operator-(RValue<Float4> val) 5657 { 5658 return RValue<Float4>(Nucleus::createFNeg(val.value)); 5659 } 5660 5661 RValue<Float4> Abs(RValue<Float4> x) 5662 { 5663 Value *vector = Nucleus::createBitCast(x.value, Int4::getType()); 5664 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; 5665 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType()))); 5666 5667 return As<Float4>(result); 5668 } 5669 5670 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) 5671 { 5672 return x86::maxps(x, y); 5673 } 5674 5675 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y) 5676 { 5677 return x86::minps(x, y); 5678 } 5679 5680 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2) 5681 { 5682 #if defined(__i386__) || defined(__x86_64__) 5683 if(exactAtPow2) 5684 { 5685 // rcpps uses a piecewise-linear approximation which minimizes the relative error 5686 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 5687 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 5688 } 5689 #endif 5690 5691 return x86::rcpps(x); 5692 } 5693 5694 RValue<Float4> RcpSqrt_pp(RValue<Float4> x) 5695 { 5696 return x86::rsqrtps(x); 5697 } 5698 5699 RValue<Float4> Sqrt(RValue<Float4> x) 5700 { 5701 return x86::sqrtps(x); 5702 } 5703 5704 RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i) 5705 { 5706 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i)); 5707 } 5708 5709 RValue<Float> Extract(RValue<Float4> x, int i) 5710 { 5711 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i)); 5712 } 5713 5714 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select) 5715 { 5716 return RValue<Float4>(createSwizzle4(x.value, select)); 5717 } 5718 5719 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 5720 { 5721 int shuffle[4] = 5722 { 5723 ((imm >> 0) & 0x03) + 0, 5724 ((imm >> 2) & 0x03) + 0, 5725 ((imm >> 4) & 0x03) + 4, 5726 ((imm >> 6) & 0x03) + 4, 5727 }; 5728 5729 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5730 } 5731 5732 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y) 5733 { 5734 int shuffle[4] = {0, 4, 1, 5}; 5735 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5736 } 5737 5738 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y) 5739 { 5740 int shuffle[4] = {2, 6, 3, 7}; 5741 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5742 } 5743 5744 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) 5745 { 5746 Value *vector = lhs.loadValue(); 5747 Value *result = createMask4(vector, rhs.value, select); 5748 lhs.storeValue(result); 5749 5750 return RValue<Float4>(result); 5751 } 5752 5753 RValue<Int> SignMask(RValue<Float4> x) 5754 { 5755 return x86::movmskps(x); 5756 } 5757 5758 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y) 5759 { 5760 // return As<Int4>(x86::cmpeqps(x, y)); 5761 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType())); 5762 } 5763 5764 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y) 5765 { 5766 // return As<Int4>(x86::cmpltps(x, y)); 5767 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType())); 5768 } 5769 5770 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y) 5771 { 5772 // return As<Int4>(x86::cmpleps(x, y)); 5773 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType())); 5774 } 5775 5776 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y) 5777 { 5778 // return As<Int4>(x86::cmpneqps(x, y)); 5779 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType())); 5780 } 5781 5782 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y) 5783 { 5784 // return As<Int4>(x86::cmpnltps(x, y)); 5785 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType())); 5786 } 5787 5788 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y) 5789 { 5790 // return As<Int4>(x86::cmpnleps(x, y)); 5791 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType())); 5792 } 5793 5794 RValue<Float4> Round(RValue<Float4> x) 5795 { 5796 if(CPUID::supportsSSE4_1()) 5797 { 5798 return x86::roundps(x, 0); 5799 } 5800 else 5801 { 5802 return Float4(RoundInt(x)); 5803 } 5804 } 5805 5806 RValue<Float4> Trunc(RValue<Float4> x) 5807 { 5808 if(CPUID::supportsSSE4_1()) 5809 { 5810 return x86::roundps(x, 3); 5811 } 5812 else 5813 { 5814 return Float4(Int4(x)); 5815 } 5816 } 5817 5818 RValue<Float4> Frac(RValue<Float4> x) 5819 { 5820 Float4 frc; 5821 5822 if(CPUID::supportsSSE4_1()) 5823 { 5824 frc = x - Floor(x); 5825 } 5826 else 5827 { 5828 frc = x - Float4(Int4(x)); // Signed fractional part. 5829 5830 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative. 5831 } 5832 5833 // x - floor(x) can be 1.0 for very small negative x. 5834 // Clamp against the value just below 1.0. 5835 return Min(frc, As<Float4>(Int4(0x3F7FFFFF))); 5836 } 5837 5838 RValue<Float4> Floor(RValue<Float4> x) 5839 { 5840 if(CPUID::supportsSSE4_1()) 5841 { 5842 return x86::floorps(x); 5843 } 5844 else 5845 { 5846 return x - Frac(x); 5847 } 5848 } 5849 5850 RValue<Float4> Ceil(RValue<Float4> x) 5851 { 5852 if(CPUID::supportsSSE4_1()) 5853 { 5854 return x86::ceilps(x); 5855 } 5856 else 5857 { 5858 return -Floor(-x); 5859 } 5860 } 5861 5862 Type *Float4::getType() 5863 { 5864 return T(llvm::VectorType::get(T(Float::getType()), 4)); 5865 } 5866 5867 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) 5868 { 5869 return lhs + RValue<Int>(Nucleus::createConstantInt(offset)); 5870 } 5871 5872 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 5873 { 5874 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false)); 5875 } 5876 5877 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 5878 { 5879 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true)); 5880 } 5881 5882 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset) 5883 { 5884 return lhs = lhs + offset; 5885 } 5886 5887 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset) 5888 { 5889 return lhs = lhs + offset; 5890 } 5891 5892 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset) 5893 { 5894 return lhs = lhs + offset; 5895 } 5896 5897 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset) 5898 { 5899 return lhs + -offset; 5900 } 5901 5902 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 5903 { 5904 return lhs + -offset; 5905 } 5906 5907 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 5908 { 5909 return lhs + -offset; 5910 } 5911 5912 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset) 5913 { 5914 return lhs = lhs - offset; 5915 } 5916 5917 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset) 5918 { 5919 return lhs = lhs - offset; 5920 } 5921 5922 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset) 5923 { 5924 return lhs = lhs - offset; 5925 } 5926 5927 void Return() 5928 { 5929 Nucleus::createRetVoid(); 5930 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 5931 Nucleus::createUnreachable(); 5932 } 5933 5934 void Return(RValue<Int> ret) 5935 { 5936 Nucleus::createRet(ret.value); 5937 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 5938 Nucleus::createUnreachable(); 5939 } 5940 5941 void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB) 5942 { 5943 Nucleus::createCondBr(cmp.value, bodyBB, endBB); 5944 Nucleus::setInsertBlock(bodyBB); 5945 } 5946 5947 RValue<Long> Ticks() 5948 { 5949 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter); 5950 5951 return RValue<Long>(V(::builder->CreateCall(rdtsc))); 5952 } 5953 } 5954 5955 namespace sw 5956 { 5957 namespace x86 5958 { 5959 RValue<Int> cvtss2si(RValue<Float> val) 5960 { 5961 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si); 5962 5963 Float4 vector; 5964 vector.x = val; 5965 5966 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value))); 5967 } 5968 5969 RValue<Int4> cvtps2dq(RValue<Float4> val) 5970 { 5971 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq); 5972 5973 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value))); 5974 } 5975 5976 RValue<Float> rcpss(RValue<Float> val) 5977 { 5978 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss); 5979 5980 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 5981 5982 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0)); 5983 } 5984 5985 RValue<Float> sqrtss(RValue<Float> val) 5986 { 5987 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss); 5988 5989 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 5990 5991 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0)); 5992 } 5993 5994 RValue<Float> rsqrtss(RValue<Float> val) 5995 { 5996 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss); 5997 5998 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 5999 6000 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0)); 6001 } 6002 6003 RValue<Float4> rcpps(RValue<Float4> val) 6004 { 6005 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps); 6006 6007 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value))); 6008 } 6009 6010 RValue<Float4> sqrtps(RValue<Float4> val) 6011 { 6012 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps); 6013 6014 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value))); 6015 } 6016 6017 RValue<Float4> rsqrtps(RValue<Float4> val) 6018 { 6019 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps); 6020 6021 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value))); 6022 } 6023 6024 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y) 6025 { 6026 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps); 6027 6028 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value))); 6029 } 6030 6031 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y) 6032 { 6033 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps); 6034 6035 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value))); 6036 } 6037 6038 RValue<Float> roundss(RValue<Float> val, unsigned char imm) 6039 { 6040 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss); 6041 6042 Value *undef = V(llvm::UndefValue::get(T(Float4::getType()))); 6043 Value *vector = Nucleus::createInsertElement(undef, val.value, 0); 6044 6045 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0)); 6046 } 6047 6048 RValue<Float> floorss(RValue<Float> val) 6049 { 6050 return roundss(val, 1); 6051 } 6052 6053 RValue<Float> ceilss(RValue<Float> val) 6054 { 6055 return roundss(val, 2); 6056 } 6057 6058 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm) 6059 { 6060 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps); 6061 6062 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm))))); 6063 } 6064 6065 RValue<Float4> floorps(RValue<Float4> val) 6066 { 6067 return roundps(val, 1); 6068 } 6069 6070 RValue<Float4> ceilps(RValue<Float4> val) 6071 { 6072 return roundps(val, 2); 6073 } 6074 6075 RValue<Int4> pabsd(RValue<Int4> x) 6076 { 6077 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128); 6078 6079 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value))); 6080 } 6081 6082 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) 6083 { 6084 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w); 6085 6086 return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value))); 6087 } 6088 6089 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) 6090 { 6091 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w); 6092 6093 return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value))); 6094 } 6095 6096 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) 6097 { 6098 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w); 6099 6100 return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value))); 6101 } 6102 6103 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) 6104 { 6105 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w); 6106 6107 return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value))); 6108 } 6109 6110 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) 6111 { 6112 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b); 6113 6114 return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value))); 6115 } 6116 6117 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) 6118 { 6119 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b); 6120 6121 return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value))); 6122 } 6123 6124 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) 6125 { 6126 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b); 6127 6128 return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value))); 6129 } 6130 6131 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) 6132 { 6133 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b); 6134 6135 return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value))); 6136 } 6137 6138 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) 6139 { 6140 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w); 6141 6142 return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value))); 6143 } 6144 6145 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) 6146 { 6147 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w); 6148 6149 return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value))); 6150 } 6151 6152 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) 6153 { 6154 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w); 6155 6156 return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value))); 6157 } 6158 6159 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) 6160 { 6161 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w); 6162 6163 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value))); 6164 } 6165 6166 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) 6167 { 6168 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w); 6169 6170 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value))); 6171 } 6172 6173 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) 6174 { 6175 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b); 6176 6177 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value))); 6178 } 6179 6180 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) 6181 { 6182 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b); 6183 6184 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value))); 6185 } 6186 6187 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) 6188 { 6189 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 6190 6191 return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value))); 6192 } 6193 6194 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) 6195 { 6196 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 6197 6198 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value))); 6199 } 6200 6201 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) 6202 { 6203 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128); 6204 6205 return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value))); 6206 } 6207 6208 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y) 6209 { 6210 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128); 6211 6212 return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value))); 6213 } 6214 6215 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y) 6216 { 6217 if(CPUID::supportsSSE4_1()) 6218 { 6219 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw); 6220 6221 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value))); 6222 } 6223 else 6224 { 6225 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000); 6226 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000); 6227 6228 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u)); 6229 } 6230 } 6231 6232 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) 6233 { 6234 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 6235 6236 return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y))))); 6237 } 6238 6239 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) 6240 { 6241 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 6242 6243 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y))))); 6244 } 6245 6246 RValue<Short4> psraw(RValue<Short4> x, unsigned char y) 6247 { 6248 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 6249 6250 return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y))))); 6251 } 6252 6253 RValue<Short8> psraw(RValue<Short8> x, unsigned char y) 6254 { 6255 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 6256 6257 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y))))); 6258 } 6259 6260 RValue<Short4> psllw(RValue<Short4> x, unsigned char y) 6261 { 6262 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 6263 6264 return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y))))); 6265 } 6266 6267 RValue<Short8> psllw(RValue<Short8> x, unsigned char y) 6268 { 6269 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 6270 6271 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y))))); 6272 } 6273 6274 RValue<Int2> pslld(RValue<Int2> x, unsigned char y) 6275 { 6276 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 6277 6278 return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y))))); 6279 } 6280 6281 RValue<Int4> pslld(RValue<Int4> x, unsigned char y) 6282 { 6283 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 6284 6285 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y))))); 6286 } 6287 6288 RValue<Int2> psrad(RValue<Int2> x, unsigned char y) 6289 { 6290 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 6291 6292 return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y))))); 6293 } 6294 6295 RValue<Int4> psrad(RValue<Int4> x, unsigned char y) 6296 { 6297 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 6298 6299 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y))))); 6300 } 6301 6302 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) 6303 { 6304 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 6305 6306 return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y))))); 6307 } 6308 6309 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) 6310 { 6311 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 6312 6313 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y))))); 6314 } 6315 6316 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) 6317 { 6318 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd); 6319 6320 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value))); 6321 } 6322 6323 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y) 6324 { 6325 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd); 6326 6327 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value))); 6328 } 6329 6330 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y) 6331 { 6332 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud); 6333 6334 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value))); 6335 } 6336 6337 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y) 6338 { 6339 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud); 6340 6341 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value))); 6342 } 6343 6344 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) 6345 { 6346 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 6347 6348 return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value))); 6349 } 6350 6351 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) 6352 { 6353 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 6354 6355 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value))); 6356 } 6357 6358 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) 6359 { 6360 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 6361 6362 return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value))); 6363 } 6364 6365 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) 6366 { 6367 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 6368 6369 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value))); 6370 } 6371 6372 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y) 6373 { 6374 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 6375 6376 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value))); 6377 } 6378 6379 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y) 6380 { 6381 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 6382 6383 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value))); 6384 } 6385 6386 RValue<Int> movmskps(RValue<Float4> x) 6387 { 6388 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps); 6389 6390 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value))); 6391 } 6392 6393 RValue<Int> pmovmskb(RValue<Byte8> x) 6394 { 6395 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128); 6396 6397 return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF; 6398 } 6399 6400 RValue<Int4> pmovzxbd(RValue<Byte16> x) 6401 { 6402 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd); 6403 6404 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value))); 6405 } 6406 6407 RValue<Int4> pmovsxbd(RValue<SByte16> x) 6408 { 6409 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd); 6410 6411 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value))); 6412 } 6413 6414 RValue<Int4> pmovzxwd(RValue<UShort8> x) 6415 { 6416 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd); 6417 6418 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value))); 6419 } 6420 6421 RValue<Int4> pmovsxwd(RValue<Short8> x) 6422 { 6423 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd); 6424 6425 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value))); 6426 } 6427 } 6428 } 6429