1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Reactor.hpp" 16 17 #include "llvm/Support/IRBuilder.h" 18 #include "llvm/Function.h" 19 #include "llvm/GlobalVariable.h" 20 #include "llvm/Module.h" 21 #include "llvm/LLVMContext.h" 22 #include "llvm/Constants.h" 23 #include "llvm/Intrinsics.h" 24 #include "llvm/PassManager.h" 25 #include "llvm/Analysis/LoopPass.h" 26 #include "llvm/Transforms/Scalar.h" 27 #include "llvm/Target/TargetData.h" 28 #include "llvm/Target/TargetOptions.h" 29 #include "llvm/Support/TargetSelect.h" 30 #include "../lib/ExecutionEngine/JIT/JIT.h" 31 32 #include "LLVMRoutine.hpp" 33 #include "LLVMRoutineManager.hpp" 34 #include "x86.hpp" 35 #include "Common/CPUID.hpp" 36 #include "Common/Thread.hpp" 37 #include "Common/Memory.hpp" 38 #include "Common/MutexLock.hpp" 39 40 #include <fstream> 41 42 #if defined(__i386__) || defined(__x86_64__) 43 #include <xmmintrin.h> 44 #endif 45 46 #if defined(__x86_64__) && defined(_WIN32) 47 extern "C" void X86CompilationCallback() 48 { 49 assert(false); // UNIMPLEMENTED 50 } 51 #endif 52 53 extern "C" 54 { 55 bool (*CodeAnalystInitialize)() = 0; 56 void (*CodeAnalystCompleteJITLog)() = 0; 57 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0; 58 } 59 60 namespace llvm 61 { 62 extern bool JITEmitDebugInfo; 63 } 64 65 namespace 66 { 67 sw::LLVMRoutineManager *routineManager = nullptr; 68 llvm::ExecutionEngine *executionEngine = nullptr; 69 llvm::IRBuilder<> *builder = nullptr; 70 llvm::LLVMContext *context = nullptr; 71 llvm::Module *module = nullptr; 72 llvm::Function *function = nullptr; 73 74 sw::MutexLock codegenMutex; 75 } 76 77 namespace sw 78 { 79 Optimization optimization[10] = {InstructionCombining, Disabled}; 80 81 enum EmulatedType 82 { 83 Type_v2i32, 84 Type_v4i16, 85 Type_v2i16, 86 Type_v8i8, 87 Type_v4i8, 88 Type_v2f32, 89 EmulatedTypeCount 90 }; 91 92 class Value : public llvm::Value {}; 93 class SwitchCases : public llvm::SwitchInst {}; 94 class BasicBlock : public llvm::BasicBlock {}; 95 96 llvm::Type *T(Type *t) 97 { 98 uintptr_t type = reinterpret_cast<uintptr_t>(t); 99 if(type < EmulatedTypeCount) 100 { 101 // Use 128-bit vectors to implement logically shorter ones. 102 switch(type) 103 { 104 case Type_v2i32: return T(Int4::getType()); 105 case Type_v4i16: return T(Short8::getType()); 106 case Type_v2i16: return T(Short8::getType()); 107 case Type_v8i8: return T(Byte16::getType()); 108 case Type_v4i8: return T(Byte16::getType()); 109 case Type_v2f32: return T(Float4::getType()); 110 default: assert(false); 111 } 112 } 113 114 return reinterpret_cast<llvm::Type*>(t); 115 } 116 117 inline Type *T(llvm::Type *t) 118 { 119 return reinterpret_cast<Type*>(t); 120 } 121 122 Type *T(EmulatedType t) 123 { 124 return reinterpret_cast<Type*>(t); 125 } 126 127 inline Value *V(llvm::Value *t) 128 { 129 return reinterpret_cast<Value*>(t); 130 } 131 132 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t) 133 { 134 return reinterpret_cast<std::vector<llvm::Type*>&>(t); 135 } 136 137 inline BasicBlock *B(llvm::BasicBlock *t) 138 { 139 return reinterpret_cast<BasicBlock*>(t); 140 } 141 142 static size_t typeSize(Type *type) 143 { 144 uintptr_t t = reinterpret_cast<uintptr_t>(type); 145 if(t < EmulatedTypeCount) 146 { 147 switch(t) 148 { 149 case Type_v2i32: return 8; 150 case Type_v4i16: return 8; 151 case Type_v2i16: return 4; 152 case Type_v8i8: return 8; 153 case Type_v4i8: return 4; 154 case Type_v2f32: return 8; 155 default: assert(false); 156 } 157 } 158 159 return T(type)->getPrimitiveSizeInBits() / 8; 160 } 161 162 static unsigned int elementCount(Type *type) 163 { 164 uintptr_t t = reinterpret_cast<uintptr_t>(type); 165 if(t < EmulatedTypeCount) 166 { 167 switch(t) 168 { 169 case Type_v2i32: return 2; 170 case Type_v4i16: return 4; 171 case Type_v2i16: return 2; 172 case Type_v8i8: return 8; 173 case Type_v4i8: return 4; 174 case Type_v2f32: return 2; 175 default: assert(false); 176 } 177 } 178 179 return llvm::cast<llvm::VectorType>(T(type))->getNumElements(); 180 } 181 182 Nucleus::Nucleus() 183 { 184 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe 185 186 llvm::InitializeNativeTarget(); 187 llvm::JITEmitDebugInfo = false; 188 189 if(!::context) 190 { 191 ::context = new llvm::LLVMContext(); 192 } 193 194 ::module = new llvm::Module("", *::context); 195 ::routineManager = new LLVMRoutineManager(); 196 197 #if defined(__x86_64__) 198 const char *architecture = "x86-64"; 199 #else 200 const char *architecture = "x86"; 201 #endif 202 203 llvm::SmallVector<std::string, 1> MAttrs; 204 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx"); 205 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov"); 206 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse"); 207 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2"); 208 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3"); 209 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3"); 210 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41"); 211 212 std::string error; 213 llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error); 214 ::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine); 215 216 if(!::builder) 217 { 218 ::builder = new llvm::IRBuilder<>(*::context); 219 220 #if defined(_WIN32) 221 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll"); 222 if(CodeAnalyst) 223 { 224 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize"); 225 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog"); 226 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode"); 227 228 CodeAnalystInitialize(); 229 } 230 #endif 231 } 232 } 233 234 Nucleus::~Nucleus() 235 { 236 delete ::executionEngine; 237 ::executionEngine = nullptr; 238 239 ::routineManager = nullptr; 240 ::function = nullptr; 241 ::module = nullptr; 242 243 ::codegenMutex.unlock(); 244 } 245 246 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations) 247 { 248 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator()) 249 { 250 llvm::Type *type = ::function->getReturnType(); 251 252 if(type->isVoidTy()) 253 { 254 createRetVoid(); 255 } 256 else 257 { 258 createRet(V(llvm::UndefValue::get(type))); 259 } 260 } 261 262 if(false) 263 { 264 std::string error; 265 llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error); 266 ::module->print(file, 0); 267 } 268 269 if(runOptimizations) 270 { 271 optimize(); 272 } 273 274 if(false) 275 { 276 std::string error; 277 llvm::raw_fd_ostream file("llvm-dump-opt.txt", error); 278 ::module->print(file, 0); 279 } 280 281 void *entry = ::executionEngine->getPointerToFunction(::function); 282 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry); 283 284 if(CodeAnalystLogJITCode) 285 { 286 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name); 287 } 288 289 return routine; 290 } 291 292 void Nucleus::optimize() 293 { 294 static llvm::PassManager *passManager = nullptr; 295 296 if(!passManager) 297 { 298 passManager = new llvm::PassManager(); 299 300 llvm::UnsafeFPMath = true; 301 // llvm::NoInfsFPMath = true; 302 // llvm::NoNaNsFPMath = true; 303 304 passManager->add(new llvm::TargetData(*::executionEngine->getTargetData())); 305 passManager->add(llvm::createScalarReplAggregatesPass()); 306 307 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 308 { 309 switch(optimization[pass]) 310 { 311 case Disabled: break; 312 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; 313 case LICM: passManager->add(llvm::createLICMPass()); break; 314 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; 315 case GVN: passManager->add(llvm::createGVNPass()); break; 316 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; 317 case Reassociate: passManager->add(llvm::createReassociatePass()); break; 318 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; 319 case SCCP: passManager->add(llvm::createSCCPPass()); break; 320 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break; 321 default: 322 assert(false); 323 } 324 } 325 } 326 327 passManager->run(*::module); 328 } 329 330 Value *Nucleus::allocateStackVariable(Type *type, int arraySize) 331 { 332 // Need to allocate it in the entry block for mem2reg to work 333 llvm::BasicBlock &entryBlock = ::function->getEntryBlock(); 334 335 llvm::Instruction *declaration; 336 337 if(arraySize) 338 { 339 declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize)); 340 } 341 else 342 { 343 declaration = new llvm::AllocaInst(T(type), (Value*)nullptr); 344 } 345 346 entryBlock.getInstList().push_front(declaration); 347 348 return V(declaration); 349 } 350 351 BasicBlock *Nucleus::createBasicBlock() 352 { 353 return B(BasicBlock::Create(*::context, "", ::function)); 354 } 355 356 BasicBlock *Nucleus::getInsertBlock() 357 { 358 return B(::builder->GetInsertBlock()); 359 } 360 361 void Nucleus::setInsertBlock(BasicBlock *basicBlock) 362 { 363 // assert(::builder->GetInsertBlock()->back().isTerminator()); 364 return ::builder->SetInsertPoint(basicBlock); 365 } 366 367 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params) 368 { 369 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false); 370 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module); 371 ::function->setCallingConv(llvm::CallingConv::C); 372 373 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function)); 374 } 375 376 Value *Nucleus::getArgument(unsigned int index) 377 { 378 llvm::Function::arg_iterator args = ::function->arg_begin(); 379 380 while(index) 381 { 382 args++; 383 index--; 384 } 385 386 return V(&*args); 387 } 388 389 void Nucleus::createRetVoid() 390 { 391 ::builder->CreateRetVoid(); 392 } 393 394 void Nucleus::createRet(Value *v) 395 { 396 ::builder->CreateRet(v); 397 } 398 399 void Nucleus::createBr(BasicBlock *dest) 400 { 401 ::builder->CreateBr(dest); 402 } 403 404 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse) 405 { 406 ::builder->CreateCondBr(cond, ifTrue, ifFalse); 407 } 408 409 Value *Nucleus::createAdd(Value *lhs, Value *rhs) 410 { 411 return V(::builder->CreateAdd(lhs, rhs)); 412 } 413 414 Value *Nucleus::createSub(Value *lhs, Value *rhs) 415 { 416 return V(::builder->CreateSub(lhs, rhs)); 417 } 418 419 Value *Nucleus::createMul(Value *lhs, Value *rhs) 420 { 421 return V(::builder->CreateMul(lhs, rhs)); 422 } 423 424 Value *Nucleus::createUDiv(Value *lhs, Value *rhs) 425 { 426 return V(::builder->CreateUDiv(lhs, rhs)); 427 } 428 429 Value *Nucleus::createSDiv(Value *lhs, Value *rhs) 430 { 431 return V(::builder->CreateSDiv(lhs, rhs)); 432 } 433 434 Value *Nucleus::createFAdd(Value *lhs, Value *rhs) 435 { 436 return V(::builder->CreateFAdd(lhs, rhs)); 437 } 438 439 Value *Nucleus::createFSub(Value *lhs, Value *rhs) 440 { 441 return V(::builder->CreateFSub(lhs, rhs)); 442 } 443 444 Value *Nucleus::createFMul(Value *lhs, Value *rhs) 445 { 446 return V(::builder->CreateFMul(lhs, rhs)); 447 } 448 449 Value *Nucleus::createFDiv(Value *lhs, Value *rhs) 450 { 451 return V(::builder->CreateFDiv(lhs, rhs)); 452 } 453 454 Value *Nucleus::createURem(Value *lhs, Value *rhs) 455 { 456 return V(::builder->CreateURem(lhs, rhs)); 457 } 458 459 Value *Nucleus::createSRem(Value *lhs, Value *rhs) 460 { 461 return V(::builder->CreateSRem(lhs, rhs)); 462 } 463 464 Value *Nucleus::createFRem(Value *lhs, Value *rhs) 465 { 466 return V(::builder->CreateFRem(lhs, rhs)); 467 } 468 469 Value *Nucleus::createShl(Value *lhs, Value *rhs) 470 { 471 return V(::builder->CreateShl(lhs, rhs)); 472 } 473 474 Value *Nucleus::createLShr(Value *lhs, Value *rhs) 475 { 476 return V(::builder->CreateLShr(lhs, rhs)); 477 } 478 479 Value *Nucleus::createAShr(Value *lhs, Value *rhs) 480 { 481 return V(::builder->CreateAShr(lhs, rhs)); 482 } 483 484 Value *Nucleus::createAnd(Value *lhs, Value *rhs) 485 { 486 return V(::builder->CreateAnd(lhs, rhs)); 487 } 488 489 Value *Nucleus::createOr(Value *lhs, Value *rhs) 490 { 491 return V(::builder->CreateOr(lhs, rhs)); 492 } 493 494 Value *Nucleus::createXor(Value *lhs, Value *rhs) 495 { 496 return V(::builder->CreateXor(lhs, rhs)); 497 } 498 499 Value *Nucleus::createNeg(Value *v) 500 { 501 return V(::builder->CreateNeg(v)); 502 } 503 504 Value *Nucleus::createFNeg(Value *v) 505 { 506 return V(::builder->CreateFNeg(v)); 507 } 508 509 Value *Nucleus::createNot(Value *v) 510 { 511 return V(::builder->CreateNot(v)); 512 } 513 514 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 515 { 516 uintptr_t t = reinterpret_cast<uintptr_t>(type); 517 if(t < EmulatedTypeCount) 518 { 519 switch(t) 520 { 521 case Type_v2i32: 522 case Type_v4i16: 523 case Type_v8i8: 524 case Type_v2f32: 525 return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type))); 526 case Type_v2i16: 527 case Type_v4i8: 528 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 529 { 530 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))); 531 Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment)); 532 i = createZExt(i, Long::getType()); 533 Value *v = V(createInsertElement(u, i, 0)); 534 return createBitCast(v, T(T(type))); 535 } 536 break; 537 default: 538 assert(false); 539 } 540 } 541 542 assert(ptr->getType()->getContainedType(0) == T(type)); 543 return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment))); 544 } 545 546 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 547 { 548 uintptr_t t = reinterpret_cast<uintptr_t>(type); 549 if(t < EmulatedTypeCount) 550 { 551 switch(t) 552 { 553 case Type_v2i32: 554 case Type_v4i16: 555 case Type_v8i8: 556 case Type_v2f32: 557 createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment); 558 return value; 559 case Type_v2i16: 560 case Type_v4i8: 561 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 562 { 563 createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment); 564 return value; 565 } 566 break; 567 default: 568 assert(false); 569 } 570 } 571 572 assert(ptr->getType()->getContainedType(0) == T(type)); 573 ::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment)); 574 return value; 575 } 576 577 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex) 578 { 579 if(sizeof(void*) == 8) 580 { 581 if(unsignedIndex) 582 { 583 index = createZExt(index, Long::getType()); 584 } 585 else 586 { 587 index = createSExt(index, Long::getType()); 588 } 589 590 index = createMul(index, createConstantLong((int64_t)typeSize(type))); 591 } 592 else 593 { 594 index = createMul(index, createConstantInt((int)typeSize(type))); 595 } 596 597 assert(ptr->getType()->getContainedType(0) == T(type)); 598 return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0))); 599 } 600 601 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) 602 { 603 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent)); 604 } 605 606 Value *Nucleus::createTrunc(Value *v, Type *destType) 607 { 608 return V(::builder->CreateTrunc(v, T(destType))); 609 } 610 611 Value *Nucleus::createZExt(Value *v, Type *destType) 612 { 613 return V(::builder->CreateZExt(v, T(destType))); 614 } 615 616 Value *Nucleus::createSExt(Value *v, Type *destType) 617 { 618 return V(::builder->CreateSExt(v, T(destType))); 619 } 620 621 Value *Nucleus::createFPToSI(Value *v, Type *destType) 622 { 623 return V(::builder->CreateFPToSI(v, T(destType))); 624 } 625 626 Value *Nucleus::createSIToFP(Value *v, Type *destType) 627 { 628 return V(::builder->CreateSIToFP(v, T(destType))); 629 } 630 631 Value *Nucleus::createFPTrunc(Value *v, Type *destType) 632 { 633 return V(::builder->CreateFPTrunc(v, T(destType))); 634 } 635 636 Value *Nucleus::createFPExt(Value *v, Type *destType) 637 { 638 return V(::builder->CreateFPExt(v, T(destType))); 639 } 640 641 Value *Nucleus::createBitCast(Value *v, Type *destType) 642 { 643 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need 644 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and 645 // reading back as the destination type. 646 if(!v->getType()->isVectorTy() && T(destType)->isVectorTy()) 647 { 648 Value *readAddress = allocateStackVariable(destType); 649 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0))); 650 createStore(v, writeAddress, T(v->getType())); 651 return createLoad(readAddress, destType); 652 } 653 else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy()) 654 { 655 Value *writeAddress = allocateStackVariable(T(v->getType())); 656 createStore(v, writeAddress, T(v->getType())); 657 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0))); 658 return createLoad(readAddress, destType); 659 } 660 661 return V(::builder->CreateBitCast(v, T(destType))); 662 } 663 664 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) 665 { 666 return V(::builder->CreateICmpEQ(lhs, rhs)); 667 } 668 669 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs) 670 { 671 return V(::builder->CreateICmpNE(lhs, rhs)); 672 } 673 674 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs) 675 { 676 return V(::builder->CreateICmpUGT(lhs, rhs)); 677 } 678 679 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs) 680 { 681 return V(::builder->CreateICmpUGE(lhs, rhs)); 682 } 683 684 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs) 685 { 686 return V(::builder->CreateICmpULT(lhs, rhs)); 687 } 688 689 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs) 690 { 691 return V(::builder->CreateICmpULE(lhs, rhs)); 692 } 693 694 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs) 695 { 696 return V(::builder->CreateICmpSGT(lhs, rhs)); 697 } 698 699 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs) 700 { 701 return V(::builder->CreateICmpSGE(lhs, rhs)); 702 } 703 704 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs) 705 { 706 return V(::builder->CreateICmpSLT(lhs, rhs)); 707 } 708 709 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs) 710 { 711 return V(::builder->CreateICmpSLE(lhs, rhs)); 712 } 713 714 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs) 715 { 716 return V(::builder->CreateFCmpOEQ(lhs, rhs)); 717 } 718 719 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs) 720 { 721 return V(::builder->CreateFCmpOGT(lhs, rhs)); 722 } 723 724 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs) 725 { 726 return V(::builder->CreateFCmpOGE(lhs, rhs)); 727 } 728 729 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs) 730 { 731 return V(::builder->CreateFCmpOLT(lhs, rhs)); 732 } 733 734 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs) 735 { 736 return V(::builder->CreateFCmpOLE(lhs, rhs)); 737 } 738 739 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs) 740 { 741 return V(::builder->CreateFCmpONE(lhs, rhs)); 742 } 743 744 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs) 745 { 746 return V(::builder->CreateFCmpORD(lhs, rhs)); 747 } 748 749 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs) 750 { 751 return V(::builder->CreateFCmpUNO(lhs, rhs)); 752 } 753 754 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs) 755 { 756 return V(::builder->CreateFCmpUEQ(lhs, rhs)); 757 } 758 759 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs) 760 { 761 return V(::builder->CreateFCmpUGT(lhs, rhs)); 762 } 763 764 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs) 765 { 766 return V(::builder->CreateFCmpUGE(lhs, rhs)); 767 } 768 769 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs) 770 { 771 return V(::builder->CreateFCmpULT(lhs, rhs)); 772 } 773 774 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs) 775 { 776 return V(::builder->CreateFCmpULE(lhs, rhs)); 777 } 778 779 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs) 780 { 781 return V(::builder->CreateFCmpULE(lhs, rhs)); 782 } 783 784 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index) 785 { 786 assert(vector->getType()->getContainedType(0) == T(type)); 787 return V(::builder->CreateExtractElement(vector, createConstantInt(index))); 788 } 789 790 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index) 791 { 792 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index))); 793 } 794 795 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select) 796 { 797 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements(); 798 const int maxSize = 16; 799 llvm::Constant *swizzle[maxSize]; 800 assert(size <= maxSize); 801 802 for(int i = 0; i < size; i++) 803 { 804 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]); 805 } 806 807 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size)); 808 809 return V(::builder->CreateShuffleVector(V1, V2, shuffle)); 810 } 811 812 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse) 813 { 814 return V(::builder->CreateSelect(C, ifTrue, ifFalse)); 815 } 816 817 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases) 818 { 819 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases)); 820 } 821 822 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch) 823 { 824 switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch); 825 } 826 827 void Nucleus::createUnreachable() 828 { 829 ::builder->CreateUnreachable(); 830 } 831 832 static Value *createSwizzle4(Value *val, unsigned char select) 833 { 834 int swizzle[4] = 835 { 836 (select >> 0) & 0x03, 837 (select >> 2) & 0x03, 838 (select >> 4) & 0x03, 839 (select >> 6) & 0x03, 840 }; 841 842 return Nucleus::createShuffleVector(val, val, swizzle); 843 } 844 845 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select) 846 { 847 bool mask[4] = {false, false, false, false}; 848 849 mask[(select >> 0) & 0x03] = true; 850 mask[(select >> 2) & 0x03] = true; 851 mask[(select >> 4) & 0x03] = true; 852 mask[(select >> 6) & 0x03] = true; 853 854 int swizzle[4] = 855 { 856 mask[0] ? 4 : 0, 857 mask[1] ? 5 : 1, 858 mask[2] ? 6 : 2, 859 mask[3] ? 7 : 3, 860 }; 861 862 return Nucleus::createShuffleVector(lhs, rhs, swizzle); 863 } 864 865 Type *Nucleus::getPointerType(Type *ElementType) 866 { 867 return T(llvm::PointerType::get(T(ElementType), 0)); 868 } 869 870 Value *Nucleus::createNullValue(Type *Ty) 871 { 872 return V(llvm::Constant::getNullValue(T(Ty))); 873 } 874 875 Value *Nucleus::createConstantLong(int64_t i) 876 { 877 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true)); 878 } 879 880 Value *Nucleus::createConstantInt(int i) 881 { 882 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true)); 883 } 884 885 Value *Nucleus::createConstantInt(unsigned int i) 886 { 887 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false)); 888 } 889 890 Value *Nucleus::createConstantBool(bool b) 891 { 892 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b)); 893 } 894 895 Value *Nucleus::createConstantByte(signed char i) 896 { 897 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true)); 898 } 899 900 Value *Nucleus::createConstantByte(unsigned char i) 901 { 902 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false)); 903 } 904 905 Value *Nucleus::createConstantShort(short i) 906 { 907 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true)); 908 } 909 910 Value *Nucleus::createConstantShort(unsigned short i) 911 { 912 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false)); 913 } 914 915 Value *Nucleus::createConstantFloat(float x) 916 { 917 return V(llvm::ConstantFP::get(T(Float::getType()), x)); 918 } 919 920 Value *Nucleus::createNullPointer(Type *Ty) 921 { 922 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0))); 923 } 924 925 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type) 926 { 927 assert(llvm::isa<llvm::VectorType>(T(type))); 928 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 929 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 930 assert(numElements <= 16 && numConstants <= numElements); 931 llvm::Constant *constantVector[16]; 932 933 for(int i = 0; i < numElements; i++) 934 { 935 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]); 936 } 937 938 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 939 } 940 941 Value *Nucleus::createConstantVector(const double *constants, Type *type) 942 { 943 assert(llvm::isa<llvm::VectorType>(T(type))); 944 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 945 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 946 assert(numElements <= 8 && numConstants <= numElements); 947 llvm::Constant *constantVector[8]; 948 949 for(int i = 0; i < numElements; i++) 950 { 951 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]); 952 } 953 954 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 955 } 956 957 Type *Void::getType() 958 { 959 return T(llvm::Type::getVoidTy(*::context)); 960 } 961 962 Bool::Bool(Argument<Bool> argument) 963 { 964 storeValue(argument.value); 965 } 966 967 Bool::Bool(bool x) 968 { 969 storeValue(Nucleus::createConstantBool(x)); 970 } 971 972 Bool::Bool(RValue<Bool> rhs) 973 { 974 storeValue(rhs.value); 975 } 976 977 Bool::Bool(const Bool &rhs) 978 { 979 Value *value = rhs.loadValue(); 980 storeValue(value); 981 } 982 983 Bool::Bool(const Reference<Bool> &rhs) 984 { 985 Value *value = rhs.loadValue(); 986 storeValue(value); 987 } 988 989 RValue<Bool> Bool::operator=(RValue<Bool> rhs) 990 { 991 storeValue(rhs.value); 992 993 return rhs; 994 } 995 996 RValue<Bool> Bool::operator=(const Bool &rhs) 997 { 998 Value *value = rhs.loadValue(); 999 storeValue(value); 1000 1001 return RValue<Bool>(value); 1002 } 1003 1004 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) 1005 { 1006 Value *value = rhs.loadValue(); 1007 storeValue(value); 1008 1009 return RValue<Bool>(value); 1010 } 1011 1012 RValue<Bool> operator!(RValue<Bool> val) 1013 { 1014 return RValue<Bool>(Nucleus::createNot(val.value)); 1015 } 1016 1017 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs) 1018 { 1019 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value)); 1020 } 1021 1022 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs) 1023 { 1024 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value)); 1025 } 1026 1027 Type *Bool::getType() 1028 { 1029 return T(llvm::Type::getInt1Ty(*::context)); 1030 } 1031 1032 Byte::Byte(Argument<Byte> argument) 1033 { 1034 storeValue(argument.value); 1035 } 1036 1037 Byte::Byte(RValue<Int> cast) 1038 { 1039 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1040 1041 storeValue(integer); 1042 } 1043 1044 Byte::Byte(RValue<UInt> cast) 1045 { 1046 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1047 1048 storeValue(integer); 1049 } 1050 1051 Byte::Byte(RValue<UShort> cast) 1052 { 1053 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1054 1055 storeValue(integer); 1056 } 1057 1058 Byte::Byte(int x) 1059 { 1060 storeValue(Nucleus::createConstantByte((unsigned char)x)); 1061 } 1062 1063 Byte::Byte(unsigned char x) 1064 { 1065 storeValue(Nucleus::createConstantByte(x)); 1066 } 1067 1068 Byte::Byte(RValue<Byte> rhs) 1069 { 1070 storeValue(rhs.value); 1071 } 1072 1073 Byte::Byte(const Byte &rhs) 1074 { 1075 Value *value = rhs.loadValue(); 1076 storeValue(value); 1077 } 1078 1079 Byte::Byte(const Reference<Byte> &rhs) 1080 { 1081 Value *value = rhs.loadValue(); 1082 storeValue(value); 1083 } 1084 1085 RValue<Byte> Byte::operator=(RValue<Byte> rhs) 1086 { 1087 storeValue(rhs.value); 1088 1089 return rhs; 1090 } 1091 1092 RValue<Byte> Byte::operator=(const Byte &rhs) 1093 { 1094 Value *value = rhs.loadValue(); 1095 storeValue(value); 1096 1097 return RValue<Byte>(value); 1098 } 1099 1100 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) 1101 { 1102 Value *value = rhs.loadValue(); 1103 storeValue(value); 1104 1105 return RValue<Byte>(value); 1106 } 1107 1108 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs) 1109 { 1110 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value)); 1111 } 1112 1113 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs) 1114 { 1115 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value)); 1116 } 1117 1118 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs) 1119 { 1120 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value)); 1121 } 1122 1123 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs) 1124 { 1125 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value)); 1126 } 1127 1128 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs) 1129 { 1130 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value)); 1131 } 1132 1133 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs) 1134 { 1135 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value)); 1136 } 1137 1138 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs) 1139 { 1140 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value)); 1141 } 1142 1143 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs) 1144 { 1145 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value)); 1146 } 1147 1148 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs) 1149 { 1150 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value)); 1151 } 1152 1153 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs) 1154 { 1155 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value)); 1156 } 1157 1158 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs) 1159 { 1160 return lhs = lhs + rhs; 1161 } 1162 1163 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs) 1164 { 1165 return lhs = lhs - rhs; 1166 } 1167 1168 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs) 1169 { 1170 return lhs = lhs * rhs; 1171 } 1172 1173 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs) 1174 { 1175 return lhs = lhs / rhs; 1176 } 1177 1178 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs) 1179 { 1180 return lhs = lhs % rhs; 1181 } 1182 1183 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs) 1184 { 1185 return lhs = lhs & rhs; 1186 } 1187 1188 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs) 1189 { 1190 return lhs = lhs | rhs; 1191 } 1192 1193 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs) 1194 { 1195 return lhs = lhs ^ rhs; 1196 } 1197 1198 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs) 1199 { 1200 return lhs = lhs << rhs; 1201 } 1202 1203 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs) 1204 { 1205 return lhs = lhs >> rhs; 1206 } 1207 1208 RValue<Byte> operator+(RValue<Byte> val) 1209 { 1210 return val; 1211 } 1212 1213 RValue<Byte> operator-(RValue<Byte> val) 1214 { 1215 return RValue<Byte>(Nucleus::createNeg(val.value)); 1216 } 1217 1218 RValue<Byte> operator~(RValue<Byte> val) 1219 { 1220 return RValue<Byte>(Nucleus::createNot(val.value)); 1221 } 1222 1223 RValue<Byte> operator++(Byte &val, int) // Post-increment 1224 { 1225 RValue<Byte> res = val; 1226 1227 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1228 val.storeValue(inc); 1229 1230 return res; 1231 } 1232 1233 const Byte &operator++(Byte &val) // Pre-increment 1234 { 1235 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1236 val.storeValue(inc); 1237 1238 return val; 1239 } 1240 1241 RValue<Byte> operator--(Byte &val, int) // Post-decrement 1242 { 1243 RValue<Byte> res = val; 1244 1245 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1246 val.storeValue(inc); 1247 1248 return res; 1249 } 1250 1251 const Byte &operator--(Byte &val) // Pre-decrement 1252 { 1253 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1254 val.storeValue(inc); 1255 1256 return val; 1257 } 1258 1259 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs) 1260 { 1261 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1262 } 1263 1264 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs) 1265 { 1266 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1267 } 1268 1269 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs) 1270 { 1271 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1272 } 1273 1274 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs) 1275 { 1276 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1277 } 1278 1279 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs) 1280 { 1281 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1282 } 1283 1284 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs) 1285 { 1286 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1287 } 1288 1289 Type *Byte::getType() 1290 { 1291 return T(llvm::Type::getInt8Ty(*::context)); 1292 } 1293 1294 SByte::SByte(Argument<SByte> argument) 1295 { 1296 storeValue(argument.value); 1297 } 1298 1299 SByte::SByte(RValue<Int> cast) 1300 { 1301 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1302 1303 storeValue(integer); 1304 } 1305 1306 SByte::SByte(RValue<Short> cast) 1307 { 1308 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1309 1310 storeValue(integer); 1311 } 1312 1313 SByte::SByte(signed char x) 1314 { 1315 storeValue(Nucleus::createConstantByte(x)); 1316 } 1317 1318 SByte::SByte(RValue<SByte> rhs) 1319 { 1320 storeValue(rhs.value); 1321 } 1322 1323 SByte::SByte(const SByte &rhs) 1324 { 1325 Value *value = rhs.loadValue(); 1326 storeValue(value); 1327 } 1328 1329 SByte::SByte(const Reference<SByte> &rhs) 1330 { 1331 Value *value = rhs.loadValue(); 1332 storeValue(value); 1333 } 1334 1335 RValue<SByte> SByte::operator=(RValue<SByte> rhs) 1336 { 1337 storeValue(rhs.value); 1338 1339 return rhs; 1340 } 1341 1342 RValue<SByte> SByte::operator=(const SByte &rhs) 1343 { 1344 Value *value = rhs.loadValue(); 1345 storeValue(value); 1346 1347 return RValue<SByte>(value); 1348 } 1349 1350 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) 1351 { 1352 Value *value = rhs.loadValue(); 1353 storeValue(value); 1354 1355 return RValue<SByte>(value); 1356 } 1357 1358 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs) 1359 { 1360 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value)); 1361 } 1362 1363 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs) 1364 { 1365 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value)); 1366 } 1367 1368 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs) 1369 { 1370 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value)); 1371 } 1372 1373 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs) 1374 { 1375 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value)); 1376 } 1377 1378 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs) 1379 { 1380 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value)); 1381 } 1382 1383 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs) 1384 { 1385 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value)); 1386 } 1387 1388 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs) 1389 { 1390 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value)); 1391 } 1392 1393 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs) 1394 { 1395 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value)); 1396 } 1397 1398 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs) 1399 { 1400 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value)); 1401 } 1402 1403 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs) 1404 { 1405 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value)); 1406 } 1407 1408 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs) 1409 { 1410 return lhs = lhs + rhs; 1411 } 1412 1413 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs) 1414 { 1415 return lhs = lhs - rhs; 1416 } 1417 1418 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs) 1419 { 1420 return lhs = lhs * rhs; 1421 } 1422 1423 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs) 1424 { 1425 return lhs = lhs / rhs; 1426 } 1427 1428 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs) 1429 { 1430 return lhs = lhs % rhs; 1431 } 1432 1433 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs) 1434 { 1435 return lhs = lhs & rhs; 1436 } 1437 1438 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs) 1439 { 1440 return lhs = lhs | rhs; 1441 } 1442 1443 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs) 1444 { 1445 return lhs = lhs ^ rhs; 1446 } 1447 1448 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs) 1449 { 1450 return lhs = lhs << rhs; 1451 } 1452 1453 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs) 1454 { 1455 return lhs = lhs >> rhs; 1456 } 1457 1458 RValue<SByte> operator+(RValue<SByte> val) 1459 { 1460 return val; 1461 } 1462 1463 RValue<SByte> operator-(RValue<SByte> val) 1464 { 1465 return RValue<SByte>(Nucleus::createNeg(val.value)); 1466 } 1467 1468 RValue<SByte> operator~(RValue<SByte> val) 1469 { 1470 return RValue<SByte>(Nucleus::createNot(val.value)); 1471 } 1472 1473 RValue<SByte> operator++(SByte &val, int) // Post-increment 1474 { 1475 RValue<SByte> res = val; 1476 1477 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1))); 1478 val.storeValue(inc); 1479 1480 return res; 1481 } 1482 1483 const SByte &operator++(SByte &val) // Pre-increment 1484 { 1485 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1486 val.storeValue(inc); 1487 1488 return val; 1489 } 1490 1491 RValue<SByte> operator--(SByte &val, int) // Post-decrement 1492 { 1493 RValue<SByte> res = val; 1494 1495 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1))); 1496 val.storeValue(inc); 1497 1498 return res; 1499 } 1500 1501 const SByte &operator--(SByte &val) // Pre-decrement 1502 { 1503 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1504 val.storeValue(inc); 1505 1506 return val; 1507 } 1508 1509 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs) 1510 { 1511 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1512 } 1513 1514 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs) 1515 { 1516 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1517 } 1518 1519 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs) 1520 { 1521 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1522 } 1523 1524 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs) 1525 { 1526 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1527 } 1528 1529 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs) 1530 { 1531 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1532 } 1533 1534 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs) 1535 { 1536 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1537 } 1538 1539 Type *SByte::getType() 1540 { 1541 return T(llvm::Type::getInt8Ty(*::context)); 1542 } 1543 1544 Short::Short(Argument<Short> argument) 1545 { 1546 storeValue(argument.value); 1547 } 1548 1549 Short::Short(RValue<Int> cast) 1550 { 1551 Value *integer = Nucleus::createTrunc(cast.value, Short::getType()); 1552 1553 storeValue(integer); 1554 } 1555 1556 Short::Short(short x) 1557 { 1558 storeValue(Nucleus::createConstantShort(x)); 1559 } 1560 1561 Short::Short(RValue<Short> rhs) 1562 { 1563 storeValue(rhs.value); 1564 } 1565 1566 Short::Short(const Short &rhs) 1567 { 1568 Value *value = rhs.loadValue(); 1569 storeValue(value); 1570 } 1571 1572 Short::Short(const Reference<Short> &rhs) 1573 { 1574 Value *value = rhs.loadValue(); 1575 storeValue(value); 1576 } 1577 1578 RValue<Short> Short::operator=(RValue<Short> rhs) 1579 { 1580 storeValue(rhs.value); 1581 1582 return rhs; 1583 } 1584 1585 RValue<Short> Short::operator=(const Short &rhs) 1586 { 1587 Value *value = rhs.loadValue(); 1588 storeValue(value); 1589 1590 return RValue<Short>(value); 1591 } 1592 1593 RValue<Short> Short::operator=(const Reference<Short> &rhs) 1594 { 1595 Value *value = rhs.loadValue(); 1596 storeValue(value); 1597 1598 return RValue<Short>(value); 1599 } 1600 1601 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs) 1602 { 1603 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value)); 1604 } 1605 1606 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs) 1607 { 1608 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value)); 1609 } 1610 1611 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs) 1612 { 1613 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value)); 1614 } 1615 1616 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs) 1617 { 1618 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value)); 1619 } 1620 1621 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs) 1622 { 1623 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value)); 1624 } 1625 1626 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs) 1627 { 1628 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value)); 1629 } 1630 1631 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs) 1632 { 1633 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value)); 1634 } 1635 1636 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs) 1637 { 1638 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value)); 1639 } 1640 1641 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs) 1642 { 1643 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value)); 1644 } 1645 1646 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs) 1647 { 1648 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value)); 1649 } 1650 1651 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs) 1652 { 1653 return lhs = lhs + rhs; 1654 } 1655 1656 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs) 1657 { 1658 return lhs = lhs - rhs; 1659 } 1660 1661 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs) 1662 { 1663 return lhs = lhs * rhs; 1664 } 1665 1666 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs) 1667 { 1668 return lhs = lhs / rhs; 1669 } 1670 1671 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs) 1672 { 1673 return lhs = lhs % rhs; 1674 } 1675 1676 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs) 1677 { 1678 return lhs = lhs & rhs; 1679 } 1680 1681 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs) 1682 { 1683 return lhs = lhs | rhs; 1684 } 1685 1686 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs) 1687 { 1688 return lhs = lhs ^ rhs; 1689 } 1690 1691 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs) 1692 { 1693 return lhs = lhs << rhs; 1694 } 1695 1696 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs) 1697 { 1698 return lhs = lhs >> rhs; 1699 } 1700 1701 RValue<Short> operator+(RValue<Short> val) 1702 { 1703 return val; 1704 } 1705 1706 RValue<Short> operator-(RValue<Short> val) 1707 { 1708 return RValue<Short>(Nucleus::createNeg(val.value)); 1709 } 1710 1711 RValue<Short> operator~(RValue<Short> val) 1712 { 1713 return RValue<Short>(Nucleus::createNot(val.value)); 1714 } 1715 1716 RValue<Short> operator++(Short &val, int) // Post-increment 1717 { 1718 RValue<Short> res = val; 1719 1720 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1))); 1721 val.storeValue(inc); 1722 1723 return res; 1724 } 1725 1726 const Short &operator++(Short &val) // Pre-increment 1727 { 1728 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1729 val.storeValue(inc); 1730 1731 return val; 1732 } 1733 1734 RValue<Short> operator--(Short &val, int) // Post-decrement 1735 { 1736 RValue<Short> res = val; 1737 1738 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1))); 1739 val.storeValue(inc); 1740 1741 return res; 1742 } 1743 1744 const Short &operator--(Short &val) // Pre-decrement 1745 { 1746 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1747 val.storeValue(inc); 1748 1749 return val; 1750 } 1751 1752 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs) 1753 { 1754 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1755 } 1756 1757 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs) 1758 { 1759 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1760 } 1761 1762 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs) 1763 { 1764 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1765 } 1766 1767 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs) 1768 { 1769 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1770 } 1771 1772 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs) 1773 { 1774 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1775 } 1776 1777 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs) 1778 { 1779 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1780 } 1781 1782 Type *Short::getType() 1783 { 1784 return T(llvm::Type::getInt16Ty(*::context)); 1785 } 1786 1787 UShort::UShort(Argument<UShort> argument) 1788 { 1789 storeValue(argument.value); 1790 } 1791 1792 UShort::UShort(RValue<UInt> cast) 1793 { 1794 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1795 1796 storeValue(integer); 1797 } 1798 1799 UShort::UShort(RValue<Int> cast) 1800 { 1801 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1802 1803 storeValue(integer); 1804 } 1805 1806 UShort::UShort(unsigned short x) 1807 { 1808 storeValue(Nucleus::createConstantShort(x)); 1809 } 1810 1811 UShort::UShort(RValue<UShort> rhs) 1812 { 1813 storeValue(rhs.value); 1814 } 1815 1816 UShort::UShort(const UShort &rhs) 1817 { 1818 Value *value = rhs.loadValue(); 1819 storeValue(value); 1820 } 1821 1822 UShort::UShort(const Reference<UShort> &rhs) 1823 { 1824 Value *value = rhs.loadValue(); 1825 storeValue(value); 1826 } 1827 1828 RValue<UShort> UShort::operator=(RValue<UShort> rhs) 1829 { 1830 storeValue(rhs.value); 1831 1832 return rhs; 1833 } 1834 1835 RValue<UShort> UShort::operator=(const UShort &rhs) 1836 { 1837 Value *value = rhs.loadValue(); 1838 storeValue(value); 1839 1840 return RValue<UShort>(value); 1841 } 1842 1843 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) 1844 { 1845 Value *value = rhs.loadValue(); 1846 storeValue(value); 1847 1848 return RValue<UShort>(value); 1849 } 1850 1851 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs) 1852 { 1853 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value)); 1854 } 1855 1856 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs) 1857 { 1858 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value)); 1859 } 1860 1861 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs) 1862 { 1863 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value)); 1864 } 1865 1866 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs) 1867 { 1868 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value)); 1869 } 1870 1871 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs) 1872 { 1873 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value)); 1874 } 1875 1876 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs) 1877 { 1878 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value)); 1879 } 1880 1881 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs) 1882 { 1883 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value)); 1884 } 1885 1886 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs) 1887 { 1888 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value)); 1889 } 1890 1891 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs) 1892 { 1893 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value)); 1894 } 1895 1896 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs) 1897 { 1898 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value)); 1899 } 1900 1901 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs) 1902 { 1903 return lhs = lhs + rhs; 1904 } 1905 1906 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs) 1907 { 1908 return lhs = lhs - rhs; 1909 } 1910 1911 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs) 1912 { 1913 return lhs = lhs * rhs; 1914 } 1915 1916 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs) 1917 { 1918 return lhs = lhs / rhs; 1919 } 1920 1921 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs) 1922 { 1923 return lhs = lhs % rhs; 1924 } 1925 1926 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs) 1927 { 1928 return lhs = lhs & rhs; 1929 } 1930 1931 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs) 1932 { 1933 return lhs = lhs | rhs; 1934 } 1935 1936 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs) 1937 { 1938 return lhs = lhs ^ rhs; 1939 } 1940 1941 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs) 1942 { 1943 return lhs = lhs << rhs; 1944 } 1945 1946 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs) 1947 { 1948 return lhs = lhs >> rhs; 1949 } 1950 1951 RValue<UShort> operator+(RValue<UShort> val) 1952 { 1953 return val; 1954 } 1955 1956 RValue<UShort> operator-(RValue<UShort> val) 1957 { 1958 return RValue<UShort>(Nucleus::createNeg(val.value)); 1959 } 1960 1961 RValue<UShort> operator~(RValue<UShort> val) 1962 { 1963 return RValue<UShort>(Nucleus::createNot(val.value)); 1964 } 1965 1966 RValue<UShort> operator++(UShort &val, int) // Post-increment 1967 { 1968 RValue<UShort> res = val; 1969 1970 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1971 val.storeValue(inc); 1972 1973 return res; 1974 } 1975 1976 const UShort &operator++(UShort &val) // Pre-increment 1977 { 1978 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1979 val.storeValue(inc); 1980 1981 return val; 1982 } 1983 1984 RValue<UShort> operator--(UShort &val, int) // Post-decrement 1985 { 1986 RValue<UShort> res = val; 1987 1988 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1989 val.storeValue(inc); 1990 1991 return res; 1992 } 1993 1994 const UShort &operator--(UShort &val) // Pre-decrement 1995 { 1996 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1997 val.storeValue(inc); 1998 1999 return val; 2000 } 2001 2002 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs) 2003 { 2004 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 2005 } 2006 2007 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs) 2008 { 2009 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 2010 } 2011 2012 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs) 2013 { 2014 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 2015 } 2016 2017 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs) 2018 { 2019 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 2020 } 2021 2022 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs) 2023 { 2024 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 2025 } 2026 2027 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs) 2028 { 2029 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 2030 } 2031 2032 Type *UShort::getType() 2033 { 2034 return T(llvm::Type::getInt16Ty(*::context)); 2035 } 2036 2037 Byte4::Byte4(RValue<Byte8> cast) 2038 { 2039 storeValue(Nucleus::createBitCast(cast.value, getType())); 2040 } 2041 2042 Byte4::Byte4(const Reference<Byte4> &rhs) 2043 { 2044 Value *value = rhs.loadValue(); 2045 storeValue(value); 2046 } 2047 2048 Type *Byte4::getType() 2049 { 2050 return T(Type_v4i8); 2051 } 2052 2053 Type *SByte4::getType() 2054 { 2055 return T(Type_v4i8); 2056 } 2057 2058 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2059 { 2060 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2061 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2062 } 2063 2064 Byte8::Byte8(RValue<Byte8> rhs) 2065 { 2066 storeValue(rhs.value); 2067 } 2068 2069 Byte8::Byte8(const Byte8 &rhs) 2070 { 2071 Value *value = rhs.loadValue(); 2072 storeValue(value); 2073 } 2074 2075 Byte8::Byte8(const Reference<Byte8> &rhs) 2076 { 2077 Value *value = rhs.loadValue(); 2078 storeValue(value); 2079 } 2080 2081 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) 2082 { 2083 storeValue(rhs.value); 2084 2085 return rhs; 2086 } 2087 2088 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) 2089 { 2090 Value *value = rhs.loadValue(); 2091 storeValue(value); 2092 2093 return RValue<Byte8>(value); 2094 } 2095 2096 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) 2097 { 2098 Value *value = rhs.loadValue(); 2099 storeValue(value); 2100 2101 return RValue<Byte8>(value); 2102 } 2103 2104 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) 2105 { 2106 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2107 } 2108 2109 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) 2110 { 2111 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value)); 2112 } 2113 2114 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) 2115 // { 2116 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value)); 2117 // } 2118 2119 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs) 2120 // { 2121 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value)); 2122 // } 2123 2124 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs) 2125 // { 2126 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value)); 2127 // } 2128 2129 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) 2130 { 2131 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2132 } 2133 2134 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) 2135 { 2136 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value)); 2137 } 2138 2139 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) 2140 { 2141 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value)); 2142 } 2143 2144 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) 2145 // { 2146 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value)); 2147 // } 2148 2149 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs) 2150 // { 2151 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value)); 2152 // } 2153 2154 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs) 2155 { 2156 return lhs = lhs + rhs; 2157 } 2158 2159 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs) 2160 { 2161 return lhs = lhs - rhs; 2162 } 2163 2164 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs) 2165 // { 2166 // return lhs = lhs * rhs; 2167 // } 2168 2169 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs) 2170 // { 2171 // return lhs = lhs / rhs; 2172 // } 2173 2174 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs) 2175 // { 2176 // return lhs = lhs % rhs; 2177 // } 2178 2179 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs) 2180 { 2181 return lhs = lhs & rhs; 2182 } 2183 2184 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs) 2185 { 2186 return lhs = lhs | rhs; 2187 } 2188 2189 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs) 2190 { 2191 return lhs = lhs ^ rhs; 2192 } 2193 2194 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs) 2195 // { 2196 // return lhs = lhs << rhs; 2197 // } 2198 2199 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs) 2200 // { 2201 // return lhs = lhs >> rhs; 2202 // } 2203 2204 // RValue<Byte8> operator+(RValue<Byte8> val) 2205 // { 2206 // return val; 2207 // } 2208 2209 // RValue<Byte8> operator-(RValue<Byte8> val) 2210 // { 2211 // return RValue<Byte8>(Nucleus::createNeg(val.value)); 2212 // } 2213 2214 RValue<Byte8> operator~(RValue<Byte8> val) 2215 { 2216 return RValue<Byte8>(Nucleus::createNot(val.value)); 2217 } 2218 2219 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) 2220 { 2221 return x86::paddusb(x, y); 2222 } 2223 2224 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y) 2225 { 2226 return x86::psubusb(x, y); 2227 } 2228 2229 RValue<Short4> Unpack(RValue<Byte4> x) 2230 { 2231 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8 2232 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 2233 } 2234 2235 RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y) 2236 { 2237 return UnpackLow(As<Byte8>(x), As<Byte8>(y)); 2238 } 2239 2240 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) 2241 { 2242 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2243 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2244 } 2245 2246 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) 2247 { 2248 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2249 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2250 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2251 } 2252 2253 RValue<Int> SignMask(RValue<Byte8> x) 2254 { 2255 return x86::pmovmskb(x); 2256 } 2257 2258 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) 2259 // { 2260 // return x86::pcmpgtb(x, y); // FIXME: Signedness 2261 // } 2262 2263 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y) 2264 { 2265 return x86::pcmpeqb(x, y); 2266 } 2267 2268 Type *Byte8::getType() 2269 { 2270 return T(Type_v8i8); 2271 } 2272 2273 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2274 { 2275 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2276 Value *vector = V(Nucleus::createConstantVector(constantVector, getType())); 2277 2278 storeValue(Nucleus::createBitCast(vector, getType())); 2279 } 2280 2281 SByte8::SByte8(RValue<SByte8> rhs) 2282 { 2283 storeValue(rhs.value); 2284 } 2285 2286 SByte8::SByte8(const SByte8 &rhs) 2287 { 2288 Value *value = rhs.loadValue(); 2289 storeValue(value); 2290 } 2291 2292 SByte8::SByte8(const Reference<SByte8> &rhs) 2293 { 2294 Value *value = rhs.loadValue(); 2295 storeValue(value); 2296 } 2297 2298 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) 2299 { 2300 storeValue(rhs.value); 2301 2302 return rhs; 2303 } 2304 2305 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) 2306 { 2307 Value *value = rhs.loadValue(); 2308 storeValue(value); 2309 2310 return RValue<SByte8>(value); 2311 } 2312 2313 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) 2314 { 2315 Value *value = rhs.loadValue(); 2316 storeValue(value); 2317 2318 return RValue<SByte8>(value); 2319 } 2320 2321 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) 2322 { 2323 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2324 } 2325 2326 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) 2327 { 2328 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value)); 2329 } 2330 2331 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) 2332 // { 2333 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value)); 2334 // } 2335 2336 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs) 2337 // { 2338 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value)); 2339 // } 2340 2341 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs) 2342 // { 2343 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value)); 2344 // } 2345 2346 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs) 2347 { 2348 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2349 } 2350 2351 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs) 2352 { 2353 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value)); 2354 } 2355 2356 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs) 2357 { 2358 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value)); 2359 } 2360 2361 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs) 2362 // { 2363 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value)); 2364 // } 2365 2366 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) 2367 // { 2368 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value)); 2369 // } 2370 2371 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs) 2372 { 2373 return lhs = lhs + rhs; 2374 } 2375 2376 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs) 2377 { 2378 return lhs = lhs - rhs; 2379 } 2380 2381 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs) 2382 // { 2383 // return lhs = lhs * rhs; 2384 // } 2385 2386 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs) 2387 // { 2388 // return lhs = lhs / rhs; 2389 // } 2390 2391 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs) 2392 // { 2393 // return lhs = lhs % rhs; 2394 // } 2395 2396 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs) 2397 { 2398 return lhs = lhs & rhs; 2399 } 2400 2401 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs) 2402 { 2403 return lhs = lhs | rhs; 2404 } 2405 2406 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs) 2407 { 2408 return lhs = lhs ^ rhs; 2409 } 2410 2411 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs) 2412 // { 2413 // return lhs = lhs << rhs; 2414 // } 2415 2416 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs) 2417 // { 2418 // return lhs = lhs >> rhs; 2419 // } 2420 2421 // RValue<SByte8> operator+(RValue<SByte8> val) 2422 // { 2423 // return val; 2424 // } 2425 2426 // RValue<SByte8> operator-(RValue<SByte8> val) 2427 // { 2428 // return RValue<SByte8>(Nucleus::createNeg(val.value)); 2429 // } 2430 2431 RValue<SByte8> operator~(RValue<SByte8> val) 2432 { 2433 return RValue<SByte8>(Nucleus::createNot(val.value)); 2434 } 2435 2436 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) 2437 { 2438 return x86::paddsb(x, y); 2439 } 2440 2441 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y) 2442 { 2443 return x86::psubsb(x, y); 2444 } 2445 2446 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) 2447 { 2448 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2449 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2450 } 2451 2452 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) 2453 { 2454 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2455 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2456 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2457 } 2458 2459 RValue<Int> SignMask(RValue<SByte8> x) 2460 { 2461 return x86::pmovmskb(As<Byte8>(x)); 2462 } 2463 2464 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) 2465 { 2466 return x86::pcmpgtb(x, y); 2467 } 2468 2469 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y) 2470 { 2471 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y)); 2472 } 2473 2474 Type *SByte8::getType() 2475 { 2476 return T(Type_v8i8); 2477 } 2478 2479 Byte16::Byte16(RValue<Byte16> rhs) 2480 { 2481 storeValue(rhs.value); 2482 } 2483 2484 Byte16::Byte16(const Byte16 &rhs) 2485 { 2486 Value *value = rhs.loadValue(); 2487 storeValue(value); 2488 } 2489 2490 Byte16::Byte16(const Reference<Byte16> &rhs) 2491 { 2492 Value *value = rhs.loadValue(); 2493 storeValue(value); 2494 } 2495 2496 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) 2497 { 2498 storeValue(rhs.value); 2499 2500 return rhs; 2501 } 2502 2503 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) 2504 { 2505 Value *value = rhs.loadValue(); 2506 storeValue(value); 2507 2508 return RValue<Byte16>(value); 2509 } 2510 2511 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) 2512 { 2513 Value *value = rhs.loadValue(); 2514 storeValue(value); 2515 2516 return RValue<Byte16>(value); 2517 } 2518 2519 Type *Byte16::getType() 2520 { 2521 return T(llvm::VectorType::get(T(Byte::getType()), 16)); 2522 } 2523 2524 Type *SByte16::getType() 2525 { 2526 return T(llvm::VectorType::get(T(SByte::getType()), 16)); 2527 } 2528 2529 Short2::Short2(RValue<Short4> cast) 2530 { 2531 storeValue(Nucleus::createBitCast(cast.value, getType())); 2532 } 2533 2534 Type *Short2::getType() 2535 { 2536 return T(Type_v2i16); 2537 } 2538 2539 UShort2::UShort2(RValue<UShort4> cast) 2540 { 2541 storeValue(Nucleus::createBitCast(cast.value, getType())); 2542 } 2543 2544 Type *UShort2::getType() 2545 { 2546 return T(Type_v2i16); 2547 } 2548 2549 Short4::Short4(RValue<Int> cast) 2550 { 2551 Value *vector = loadValue(); 2552 Value *element = Nucleus::createTrunc(cast.value, Short::getType()); 2553 Value *insert = Nucleus::createInsertElement(vector, element, 0); 2554 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value; 2555 2556 storeValue(swizzle); 2557 } 2558 2559 Short4::Short4(RValue<Int4> cast) 2560 { 2561 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6}; 2562 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); 2563 2564 Value *packed = Nucleus::createShuffleVector(short8, short8, select); 2565 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value; 2566 2567 storeValue(short4); 2568 } 2569 2570 // Short4::Short4(RValue<Float> cast) 2571 // { 2572 // } 2573 2574 Short4::Short4(RValue<Float4> cast) 2575 { 2576 Int4 v4i32 = Int4(cast); 2577 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32)); 2578 2579 storeValue(As<Short4>(Int2(v4i32)).value); 2580 } 2581 2582 Short4::Short4(short xyzw) 2583 { 2584 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 2585 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2586 } 2587 2588 Short4::Short4(short x, short y, short z, short w) 2589 { 2590 int64_t constantVector[4] = {x, y, z, w}; 2591 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2592 } 2593 2594 Short4::Short4(RValue<Short4> rhs) 2595 { 2596 storeValue(rhs.value); 2597 } 2598 2599 Short4::Short4(const Short4 &rhs) 2600 { 2601 Value *value = rhs.loadValue(); 2602 storeValue(value); 2603 } 2604 2605 Short4::Short4(const Reference<Short4> &rhs) 2606 { 2607 Value *value = rhs.loadValue(); 2608 storeValue(value); 2609 } 2610 2611 Short4::Short4(RValue<UShort4> rhs) 2612 { 2613 storeValue(rhs.value); 2614 } 2615 2616 Short4::Short4(const UShort4 &rhs) 2617 { 2618 storeValue(rhs.loadValue()); 2619 } 2620 2621 Short4::Short4(const Reference<UShort4> &rhs) 2622 { 2623 storeValue(rhs.loadValue()); 2624 } 2625 2626 RValue<Short4> Short4::operator=(RValue<Short4> rhs) 2627 { 2628 storeValue(rhs.value); 2629 2630 return rhs; 2631 } 2632 2633 RValue<Short4> Short4::operator=(const Short4 &rhs) 2634 { 2635 Value *value = rhs.loadValue(); 2636 storeValue(value); 2637 2638 return RValue<Short4>(value); 2639 } 2640 2641 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) 2642 { 2643 Value *value = rhs.loadValue(); 2644 storeValue(value); 2645 2646 return RValue<Short4>(value); 2647 } 2648 2649 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) 2650 { 2651 storeValue(rhs.value); 2652 2653 return RValue<Short4>(rhs); 2654 } 2655 2656 RValue<Short4> Short4::operator=(const UShort4 &rhs) 2657 { 2658 Value *value = rhs.loadValue(); 2659 storeValue(value); 2660 2661 return RValue<Short4>(value); 2662 } 2663 2664 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) 2665 { 2666 Value *value = rhs.loadValue(); 2667 storeValue(value); 2668 2669 return RValue<Short4>(value); 2670 } 2671 2672 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) 2673 { 2674 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value)); 2675 } 2676 2677 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) 2678 { 2679 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value)); 2680 } 2681 2682 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) 2683 { 2684 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value)); 2685 } 2686 2687 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) 2688 // { 2689 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value)); 2690 // } 2691 2692 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs) 2693 // { 2694 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value)); 2695 // } 2696 2697 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) 2698 { 2699 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value)); 2700 } 2701 2702 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) 2703 { 2704 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value)); 2705 } 2706 2707 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) 2708 { 2709 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value)); 2710 } 2711 2712 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) 2713 { 2714 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2715 2716 return x86::psllw(lhs, rhs); 2717 } 2718 2719 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs) 2720 { 2721 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2722 2723 return x86::psraw(lhs, rhs); 2724 } 2725 2726 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs) 2727 { 2728 return lhs = lhs + rhs; 2729 } 2730 2731 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs) 2732 { 2733 return lhs = lhs - rhs; 2734 } 2735 2736 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs) 2737 { 2738 return lhs = lhs * rhs; 2739 } 2740 2741 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs) 2742 // { 2743 // return lhs = lhs / rhs; 2744 // } 2745 2746 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs) 2747 // { 2748 // return lhs = lhs % rhs; 2749 // } 2750 2751 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs) 2752 { 2753 return lhs = lhs & rhs; 2754 } 2755 2756 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs) 2757 { 2758 return lhs = lhs | rhs; 2759 } 2760 2761 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs) 2762 { 2763 return lhs = lhs ^ rhs; 2764 } 2765 2766 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs) 2767 { 2768 return lhs = lhs << rhs; 2769 } 2770 2771 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs) 2772 { 2773 return lhs = lhs >> rhs; 2774 } 2775 2776 // RValue<Short4> operator+(RValue<Short4> val) 2777 // { 2778 // return val; 2779 // } 2780 2781 RValue<Short4> operator-(RValue<Short4> val) 2782 { 2783 return RValue<Short4>(Nucleus::createNeg(val.value)); 2784 } 2785 2786 RValue<Short4> operator~(RValue<Short4> val) 2787 { 2788 return RValue<Short4>(Nucleus::createNot(val.value)); 2789 } 2790 2791 RValue<Short4> RoundShort4(RValue<Float4> cast) 2792 { 2793 RValue<Int4> int4 = RoundInt(cast); 2794 return As<Short4>(PackSigned(int4, int4)); 2795 } 2796 2797 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) 2798 { 2799 return x86::pmaxsw(x, y); 2800 } 2801 2802 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y) 2803 { 2804 return x86::pminsw(x, y); 2805 } 2806 2807 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y) 2808 { 2809 return x86::paddsw(x, y); 2810 } 2811 2812 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y) 2813 { 2814 return x86::psubsw(x, y); 2815 } 2816 2817 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y) 2818 { 2819 return x86::pmulhw(x, y); 2820 } 2821 2822 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y) 2823 { 2824 return x86::pmaddwd(x, y); 2825 } 2826 2827 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y) 2828 { 2829 auto result = x86::packsswb(x, y); 2830 2831 return As<SByte8>(Swizzle(As<Int4>(result), 0x88)); 2832 } 2833 2834 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y) 2835 { 2836 auto result = x86::packuswb(x, y); 2837 2838 return As<Byte8>(Swizzle(As<Int4>(result), 0x88)); 2839 } 2840 2841 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) 2842 { 2843 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 2844 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2845 } 2846 2847 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) 2848 { 2849 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 2850 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2851 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2852 } 2853 2854 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) 2855 { 2856 // Real type is v8i16 2857 int shuffle[8] = 2858 { 2859 (select >> 0) & 0x03, 2860 (select >> 2) & 0x03, 2861 (select >> 4) & 0x03, 2862 (select >> 6) & 0x03, 2863 (select >> 0) & 0x03, 2864 (select >> 2) & 0x03, 2865 (select >> 4) & 0x03, 2866 (select >> 6) & 0x03, 2867 }; 2868 2869 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 2870 } 2871 2872 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) 2873 { 2874 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i)); 2875 } 2876 2877 RValue<Short> Extract(RValue<Short4> val, int i) 2878 { 2879 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i)); 2880 } 2881 2882 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) 2883 { 2884 return x86::pcmpgtw(x, y); 2885 } 2886 2887 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y) 2888 { 2889 return x86::pcmpeqw(x, y); 2890 } 2891 2892 Type *Short4::getType() 2893 { 2894 return T(Type_v4i16); 2895 } 2896 2897 UShort4::UShort4(RValue<Int4> cast) 2898 { 2899 *this = Short4(cast); 2900 } 2901 2902 UShort4::UShort4(RValue<Float4> cast, bool saturate) 2903 { 2904 if(saturate) 2905 { 2906 if(CPUID::supportsSSE4_1()) 2907 { 2908 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation 2909 *this = As<Short4>(PackUnsigned(int4, int4)); 2910 } 2911 else 2912 { 2913 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)))); 2914 } 2915 } 2916 else 2917 { 2918 *this = Short4(Int4(cast)); 2919 } 2920 } 2921 2922 UShort4::UShort4(unsigned short xyzw) 2923 { 2924 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 2925 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2926 } 2927 2928 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 2929 { 2930 int64_t constantVector[4] = {x, y, z, w}; 2931 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2932 } 2933 2934 UShort4::UShort4(RValue<UShort4> rhs) 2935 { 2936 storeValue(rhs.value); 2937 } 2938 2939 UShort4::UShort4(const UShort4 &rhs) 2940 { 2941 Value *value = rhs.loadValue(); 2942 storeValue(value); 2943 } 2944 2945 UShort4::UShort4(const Reference<UShort4> &rhs) 2946 { 2947 Value *value = rhs.loadValue(); 2948 storeValue(value); 2949 } 2950 2951 UShort4::UShort4(RValue<Short4> rhs) 2952 { 2953 storeValue(rhs.value); 2954 } 2955 2956 UShort4::UShort4(const Short4 &rhs) 2957 { 2958 Value *value = rhs.loadValue(); 2959 storeValue(value); 2960 } 2961 2962 UShort4::UShort4(const Reference<Short4> &rhs) 2963 { 2964 Value *value = rhs.loadValue(); 2965 storeValue(value); 2966 } 2967 2968 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) 2969 { 2970 storeValue(rhs.value); 2971 2972 return rhs; 2973 } 2974 2975 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) 2976 { 2977 Value *value = rhs.loadValue(); 2978 storeValue(value); 2979 2980 return RValue<UShort4>(value); 2981 } 2982 2983 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) 2984 { 2985 Value *value = rhs.loadValue(); 2986 storeValue(value); 2987 2988 return RValue<UShort4>(value); 2989 } 2990 2991 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) 2992 { 2993 storeValue(rhs.value); 2994 2995 return RValue<UShort4>(rhs); 2996 } 2997 2998 RValue<UShort4> UShort4::operator=(const Short4 &rhs) 2999 { 3000 Value *value = rhs.loadValue(); 3001 storeValue(value); 3002 3003 return RValue<UShort4>(value); 3004 } 3005 3006 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) 3007 { 3008 Value *value = rhs.loadValue(); 3009 storeValue(value); 3010 3011 return RValue<UShort4>(value); 3012 } 3013 3014 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) 3015 { 3016 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value)); 3017 } 3018 3019 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) 3020 { 3021 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value)); 3022 } 3023 3024 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) 3025 { 3026 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value)); 3027 } 3028 3029 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs) 3030 { 3031 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value)); 3032 } 3033 3034 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs) 3035 { 3036 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value)); 3037 } 3038 3039 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs) 3040 { 3041 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value)); 3042 } 3043 3044 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) 3045 { 3046 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3047 3048 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3049 } 3050 3051 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs) 3052 { 3053 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3054 3055 return x86::psrlw(lhs, rhs); 3056 } 3057 3058 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs) 3059 { 3060 return lhs = lhs << rhs; 3061 } 3062 3063 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs) 3064 { 3065 return lhs = lhs >> rhs; 3066 } 3067 3068 RValue<UShort4> operator~(RValue<UShort4> val) 3069 { 3070 return RValue<UShort4>(Nucleus::createNot(val.value)); 3071 } 3072 3073 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) 3074 { 3075 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3076 } 3077 3078 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y) 3079 { 3080 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3081 } 3082 3083 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y) 3084 { 3085 return x86::paddusw(x, y); 3086 } 3087 3088 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y) 3089 { 3090 return x86::psubusw(x, y); 3091 } 3092 3093 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y) 3094 { 3095 return x86::pmulhuw(x, y); 3096 } 3097 3098 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) 3099 { 3100 return x86::pavgw(x, y); 3101 } 3102 3103 Type *UShort4::getType() 3104 { 3105 return T(Type_v4i16); 3106 } 3107 3108 Short8::Short8(short c) 3109 { 3110 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3111 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3112 } 3113 3114 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) 3115 { 3116 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3117 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3118 } 3119 3120 Short8::Short8(RValue<Short8> rhs) 3121 { 3122 storeValue(rhs.value); 3123 } 3124 3125 Short8::Short8(const Reference<Short8> &rhs) 3126 { 3127 Value *value = rhs.loadValue(); 3128 storeValue(value); 3129 } 3130 3131 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) 3132 { 3133 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3134 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3135 3136 storeValue(packed); 3137 } 3138 3139 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) 3140 { 3141 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value)); 3142 } 3143 3144 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs) 3145 { 3146 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value)); 3147 } 3148 3149 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs) 3150 { 3151 return x86::psllw(lhs, rhs); // FIXME: Fallback required 3152 } 3153 3154 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs) 3155 { 3156 return x86::psraw(lhs, rhs); // FIXME: Fallback required 3157 } 3158 3159 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y) 3160 { 3161 return x86::pmaddwd(x, y); // FIXME: Fallback required 3162 } 3163 3164 RValue<Int4> Abs(RValue<Int4> x) 3165 { 3166 auto negative = x >> 31; 3167 return (x ^ negative) - negative; 3168 } 3169 3170 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) 3171 { 3172 return x86::pmulhw(x, y); // FIXME: Fallback required 3173 } 3174 3175 Type *Short8::getType() 3176 { 3177 return T(llvm::VectorType::get(T(Short::getType()), 8)); 3178 } 3179 3180 UShort8::UShort8(unsigned short c) 3181 { 3182 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3183 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3184 } 3185 3186 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) 3187 { 3188 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3189 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3190 } 3191 3192 UShort8::UShort8(RValue<UShort8> rhs) 3193 { 3194 storeValue(rhs.value); 3195 } 3196 3197 UShort8::UShort8(const Reference<UShort8> &rhs) 3198 { 3199 Value *value = rhs.loadValue(); 3200 storeValue(value); 3201 } 3202 3203 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) 3204 { 3205 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3206 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3207 3208 storeValue(packed); 3209 } 3210 3211 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) 3212 { 3213 storeValue(rhs.value); 3214 3215 return rhs; 3216 } 3217 3218 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) 3219 { 3220 Value *value = rhs.loadValue(); 3221 storeValue(value); 3222 3223 return RValue<UShort8>(value); 3224 } 3225 3226 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) 3227 { 3228 Value *value = rhs.loadValue(); 3229 storeValue(value); 3230 3231 return RValue<UShort8>(value); 3232 } 3233 3234 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs) 3235 { 3236 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value)); 3237 } 3238 3239 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs) 3240 { 3241 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required 3242 } 3243 3244 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) 3245 { 3246 return x86::psrlw(lhs, rhs); // FIXME: Fallback required 3247 } 3248 3249 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs) 3250 { 3251 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value)); 3252 } 3253 3254 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs) 3255 { 3256 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value)); 3257 } 3258 3259 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs) 3260 { 3261 return lhs = lhs + rhs; 3262 } 3263 3264 RValue<UShort8> operator~(RValue<UShort8> val) 3265 { 3266 return RValue<UShort8>(Nucleus::createNot(val.value)); 3267 } 3268 3269 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7) 3270 { 3271 int pshufb[16] = 3272 { 3273 select0 + 0, 3274 select0 + 1, 3275 select1 + 0, 3276 select1 + 1, 3277 select2 + 0, 3278 select2 + 1, 3279 select3 + 0, 3280 select3 + 1, 3281 select4 + 0, 3282 select4 + 1, 3283 select5 + 0, 3284 select5 + 1, 3285 select6 + 0, 3286 select6 + 1, 3287 select7 + 0, 3288 select7 + 1, 3289 }; 3290 3291 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType()); 3292 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb); 3293 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 3294 3295 return RValue<UShort8>(short8); 3296 } 3297 3298 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) 3299 { 3300 return x86::pmulhuw(x, y); // FIXME: Fallback required 3301 } 3302 3303 Type *UShort8::getType() 3304 { 3305 return T(llvm::VectorType::get(T(UShort::getType()), 8)); 3306 } 3307 3308 Int::Int(Argument<Int> argument) 3309 { 3310 storeValue(argument.value); 3311 } 3312 3313 Int::Int(RValue<Byte> cast) 3314 { 3315 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3316 3317 storeValue(integer); 3318 } 3319 3320 Int::Int(RValue<SByte> cast) 3321 { 3322 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3323 3324 storeValue(integer); 3325 } 3326 3327 Int::Int(RValue<Short> cast) 3328 { 3329 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3330 3331 storeValue(integer); 3332 } 3333 3334 Int::Int(RValue<UShort> cast) 3335 { 3336 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3337 3338 storeValue(integer); 3339 } 3340 3341 Int::Int(RValue<Int2> cast) 3342 { 3343 *this = Extract(cast, 0); 3344 } 3345 3346 Int::Int(RValue<Long> cast) 3347 { 3348 Value *integer = Nucleus::createTrunc(cast.value, Int::getType()); 3349 3350 storeValue(integer); 3351 } 3352 3353 Int::Int(RValue<Float> cast) 3354 { 3355 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType()); 3356 3357 storeValue(integer); 3358 } 3359 3360 Int::Int(int x) 3361 { 3362 storeValue(Nucleus::createConstantInt(x)); 3363 } 3364 3365 Int::Int(RValue<Int> rhs) 3366 { 3367 storeValue(rhs.value); 3368 } 3369 3370 Int::Int(RValue<UInt> rhs) 3371 { 3372 storeValue(rhs.value); 3373 } 3374 3375 Int::Int(const Int &rhs) 3376 { 3377 Value *value = rhs.loadValue(); 3378 storeValue(value); 3379 } 3380 3381 Int::Int(const Reference<Int> &rhs) 3382 { 3383 Value *value = rhs.loadValue(); 3384 storeValue(value); 3385 } 3386 3387 Int::Int(const UInt &rhs) 3388 { 3389 Value *value = rhs.loadValue(); 3390 storeValue(value); 3391 } 3392 3393 Int::Int(const Reference<UInt> &rhs) 3394 { 3395 Value *value = rhs.loadValue(); 3396 storeValue(value); 3397 } 3398 3399 RValue<Int> Int::operator=(int rhs) 3400 { 3401 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs))); 3402 } 3403 3404 RValue<Int> Int::operator=(RValue<Int> rhs) 3405 { 3406 storeValue(rhs.value); 3407 3408 return rhs; 3409 } 3410 3411 RValue<Int> Int::operator=(RValue<UInt> rhs) 3412 { 3413 storeValue(rhs.value); 3414 3415 return RValue<Int>(rhs); 3416 } 3417 3418 RValue<Int> Int::operator=(const Int &rhs) 3419 { 3420 Value *value = rhs.loadValue(); 3421 storeValue(value); 3422 3423 return RValue<Int>(value); 3424 } 3425 3426 RValue<Int> Int::operator=(const Reference<Int> &rhs) 3427 { 3428 Value *value = rhs.loadValue(); 3429 storeValue(value); 3430 3431 return RValue<Int>(value); 3432 } 3433 3434 RValue<Int> Int::operator=(const UInt &rhs) 3435 { 3436 Value *value = rhs.loadValue(); 3437 storeValue(value); 3438 3439 return RValue<Int>(value); 3440 } 3441 3442 RValue<Int> Int::operator=(const Reference<UInt> &rhs) 3443 { 3444 Value *value = rhs.loadValue(); 3445 storeValue(value); 3446 3447 return RValue<Int>(value); 3448 } 3449 3450 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs) 3451 { 3452 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value)); 3453 } 3454 3455 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs) 3456 { 3457 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value)); 3458 } 3459 3460 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs) 3461 { 3462 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value)); 3463 } 3464 3465 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs) 3466 { 3467 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value)); 3468 } 3469 3470 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs) 3471 { 3472 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value)); 3473 } 3474 3475 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs) 3476 { 3477 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value)); 3478 } 3479 3480 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs) 3481 { 3482 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value)); 3483 } 3484 3485 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs) 3486 { 3487 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value)); 3488 } 3489 3490 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs) 3491 { 3492 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value)); 3493 } 3494 3495 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs) 3496 { 3497 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value)); 3498 } 3499 3500 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs) 3501 { 3502 return lhs = lhs + rhs; 3503 } 3504 3505 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs) 3506 { 3507 return lhs = lhs - rhs; 3508 } 3509 3510 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs) 3511 { 3512 return lhs = lhs * rhs; 3513 } 3514 3515 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs) 3516 { 3517 return lhs = lhs / rhs; 3518 } 3519 3520 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs) 3521 { 3522 return lhs = lhs % rhs; 3523 } 3524 3525 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs) 3526 { 3527 return lhs = lhs & rhs; 3528 } 3529 3530 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs) 3531 { 3532 return lhs = lhs | rhs; 3533 } 3534 3535 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs) 3536 { 3537 return lhs = lhs ^ rhs; 3538 } 3539 3540 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs) 3541 { 3542 return lhs = lhs << rhs; 3543 } 3544 3545 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs) 3546 { 3547 return lhs = lhs >> rhs; 3548 } 3549 3550 RValue<Int> operator+(RValue<Int> val) 3551 { 3552 return val; 3553 } 3554 3555 RValue<Int> operator-(RValue<Int> val) 3556 { 3557 return RValue<Int>(Nucleus::createNeg(val.value)); 3558 } 3559 3560 RValue<Int> operator~(RValue<Int> val) 3561 { 3562 return RValue<Int>(Nucleus::createNot(val.value)); 3563 } 3564 3565 RValue<Int> operator++(Int &val, int) // Post-increment 3566 { 3567 RValue<Int> res = val; 3568 3569 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 3570 val.storeValue(inc); 3571 3572 return res; 3573 } 3574 3575 const Int &operator++(Int &val) // Pre-increment 3576 { 3577 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 3578 val.storeValue(inc); 3579 3580 return val; 3581 } 3582 3583 RValue<Int> operator--(Int &val, int) // Post-decrement 3584 { 3585 RValue<Int> res = val; 3586 3587 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 3588 val.storeValue(inc); 3589 3590 return res; 3591 } 3592 3593 const Int &operator--(Int &val) // Pre-decrement 3594 { 3595 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 3596 val.storeValue(inc); 3597 3598 return val; 3599 } 3600 3601 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs) 3602 { 3603 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 3604 } 3605 3606 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs) 3607 { 3608 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 3609 } 3610 3611 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs) 3612 { 3613 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 3614 } 3615 3616 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs) 3617 { 3618 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 3619 } 3620 3621 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs) 3622 { 3623 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 3624 } 3625 3626 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs) 3627 { 3628 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 3629 } 3630 3631 RValue<Int> Max(RValue<Int> x, RValue<Int> y) 3632 { 3633 return IfThenElse(x > y, x, y); 3634 } 3635 3636 RValue<Int> Min(RValue<Int> x, RValue<Int> y) 3637 { 3638 return IfThenElse(x < y, x, y); 3639 } 3640 3641 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max) 3642 { 3643 return Min(Max(x, min), max); 3644 } 3645 3646 RValue<Int> RoundInt(RValue<Float> cast) 3647 { 3648 return x86::cvtss2si(cast); 3649 3650 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 3651 } 3652 3653 Type *Int::getType() 3654 { 3655 return T(llvm::Type::getInt32Ty(*::context)); 3656 } 3657 3658 Long::Long(RValue<Int> cast) 3659 { 3660 Value *integer = Nucleus::createSExt(cast.value, Long::getType()); 3661 3662 storeValue(integer); 3663 } 3664 3665 Long::Long(RValue<UInt> cast) 3666 { 3667 Value *integer = Nucleus::createZExt(cast.value, Long::getType()); 3668 3669 storeValue(integer); 3670 } 3671 3672 Long::Long(RValue<Long> rhs) 3673 { 3674 storeValue(rhs.value); 3675 } 3676 3677 RValue<Long> Long::operator=(int64_t rhs) 3678 { 3679 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs))); 3680 } 3681 3682 RValue<Long> Long::operator=(RValue<Long> rhs) 3683 { 3684 storeValue(rhs.value); 3685 3686 return rhs; 3687 } 3688 3689 RValue<Long> Long::operator=(const Long &rhs) 3690 { 3691 Value *value = rhs.loadValue(); 3692 storeValue(value); 3693 3694 return RValue<Long>(value); 3695 } 3696 3697 RValue<Long> Long::operator=(const Reference<Long> &rhs) 3698 { 3699 Value *value = rhs.loadValue(); 3700 storeValue(value); 3701 3702 return RValue<Long>(value); 3703 } 3704 3705 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs) 3706 { 3707 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value)); 3708 } 3709 3710 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs) 3711 { 3712 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); 3713 } 3714 3715 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs) 3716 { 3717 return lhs = lhs + rhs; 3718 } 3719 3720 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs) 3721 { 3722 return lhs = lhs - rhs; 3723 } 3724 3725 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y) 3726 { 3727 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value)); 3728 } 3729 3730 Type *Long::getType() 3731 { 3732 return T(llvm::Type::getInt64Ty(*::context)); 3733 } 3734 3735 UInt::UInt(Argument<UInt> argument) 3736 { 3737 storeValue(argument.value); 3738 } 3739 3740 UInt::UInt(RValue<UShort> cast) 3741 { 3742 Value *integer = Nucleus::createZExt(cast.value, UInt::getType()); 3743 3744 storeValue(integer); 3745 } 3746 3747 UInt::UInt(RValue<Long> cast) 3748 { 3749 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType()); 3750 3751 storeValue(integer); 3752 } 3753 3754 UInt::UInt(RValue<Float> cast) 3755 { 3756 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 3757 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType()); 3758 3759 // Smallest positive value representable in UInt, but not in Int 3760 const unsigned int ustart = 0x80000000u; 3761 const float ustartf = float(ustart); 3762 3763 // If the value is negative, store 0, otherwise store the result of the conversion 3764 storeValue((~(As<Int>(cast) >> 31) & 3765 // Check if the value can be represented as an Int 3766 IfThenElse(cast >= ustartf, 3767 // If the value is too large, subtract ustart and re-add it after conversion. 3768 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)), 3769 // Otherwise, just convert normally 3770 Int(cast))).value); 3771 } 3772 3773 UInt::UInt(int x) 3774 { 3775 storeValue(Nucleus::createConstantInt(x)); 3776 } 3777 3778 UInt::UInt(unsigned int x) 3779 { 3780 storeValue(Nucleus::createConstantInt(x)); 3781 } 3782 3783 UInt::UInt(RValue<UInt> rhs) 3784 { 3785 storeValue(rhs.value); 3786 } 3787 3788 UInt::UInt(RValue<Int> rhs) 3789 { 3790 storeValue(rhs.value); 3791 } 3792 3793 UInt::UInt(const UInt &rhs) 3794 { 3795 Value *value = rhs.loadValue(); 3796 storeValue(value); 3797 } 3798 3799 UInt::UInt(const Reference<UInt> &rhs) 3800 { 3801 Value *value = rhs.loadValue(); 3802 storeValue(value); 3803 } 3804 3805 UInt::UInt(const Int &rhs) 3806 { 3807 Value *value = rhs.loadValue(); 3808 storeValue(value); 3809 } 3810 3811 UInt::UInt(const Reference<Int> &rhs) 3812 { 3813 Value *value = rhs.loadValue(); 3814 storeValue(value); 3815 } 3816 3817 RValue<UInt> UInt::operator=(unsigned int rhs) 3818 { 3819 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs))); 3820 } 3821 3822 RValue<UInt> UInt::operator=(RValue<UInt> rhs) 3823 { 3824 storeValue(rhs.value); 3825 3826 return rhs; 3827 } 3828 3829 RValue<UInt> UInt::operator=(RValue<Int> rhs) 3830 { 3831 storeValue(rhs.value); 3832 3833 return RValue<UInt>(rhs); 3834 } 3835 3836 RValue<UInt> UInt::operator=(const UInt &rhs) 3837 { 3838 Value *value = rhs.loadValue(); 3839 storeValue(value); 3840 3841 return RValue<UInt>(value); 3842 } 3843 3844 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) 3845 { 3846 Value *value = rhs.loadValue(); 3847 storeValue(value); 3848 3849 return RValue<UInt>(value); 3850 } 3851 3852 RValue<UInt> UInt::operator=(const Int &rhs) 3853 { 3854 Value *value = rhs.loadValue(); 3855 storeValue(value); 3856 3857 return RValue<UInt>(value); 3858 } 3859 3860 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) 3861 { 3862 Value *value = rhs.loadValue(); 3863 storeValue(value); 3864 3865 return RValue<UInt>(value); 3866 } 3867 3868 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs) 3869 { 3870 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value)); 3871 } 3872 3873 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs) 3874 { 3875 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value)); 3876 } 3877 3878 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs) 3879 { 3880 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value)); 3881 } 3882 3883 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs) 3884 { 3885 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value)); 3886 } 3887 3888 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs) 3889 { 3890 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value)); 3891 } 3892 3893 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs) 3894 { 3895 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value)); 3896 } 3897 3898 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs) 3899 { 3900 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value)); 3901 } 3902 3903 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs) 3904 { 3905 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value)); 3906 } 3907 3908 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs) 3909 { 3910 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value)); 3911 } 3912 3913 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs) 3914 { 3915 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value)); 3916 } 3917 3918 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs) 3919 { 3920 return lhs = lhs + rhs; 3921 } 3922 3923 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs) 3924 { 3925 return lhs = lhs - rhs; 3926 } 3927 3928 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs) 3929 { 3930 return lhs = lhs * rhs; 3931 } 3932 3933 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs) 3934 { 3935 return lhs = lhs / rhs; 3936 } 3937 3938 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs) 3939 { 3940 return lhs = lhs % rhs; 3941 } 3942 3943 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs) 3944 { 3945 return lhs = lhs & rhs; 3946 } 3947 3948 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs) 3949 { 3950 return lhs = lhs | rhs; 3951 } 3952 3953 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs) 3954 { 3955 return lhs = lhs ^ rhs; 3956 } 3957 3958 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs) 3959 { 3960 return lhs = lhs << rhs; 3961 } 3962 3963 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs) 3964 { 3965 return lhs = lhs >> rhs; 3966 } 3967 3968 RValue<UInt> operator+(RValue<UInt> val) 3969 { 3970 return val; 3971 } 3972 3973 RValue<UInt> operator-(RValue<UInt> val) 3974 { 3975 return RValue<UInt>(Nucleus::createNeg(val.value)); 3976 } 3977 3978 RValue<UInt> operator~(RValue<UInt> val) 3979 { 3980 return RValue<UInt>(Nucleus::createNot(val.value)); 3981 } 3982 3983 RValue<UInt> operator++(UInt &val, int) // Post-increment 3984 { 3985 RValue<UInt> res = val; 3986 3987 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 3988 val.storeValue(inc); 3989 3990 return res; 3991 } 3992 3993 const UInt &operator++(UInt &val) // Pre-increment 3994 { 3995 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 3996 val.storeValue(inc); 3997 3998 return val; 3999 } 4000 4001 RValue<UInt> operator--(UInt &val, int) // Post-decrement 4002 { 4003 RValue<UInt> res = val; 4004 4005 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 4006 val.storeValue(inc); 4007 4008 return res; 4009 } 4010 4011 const UInt &operator--(UInt &val) // Pre-decrement 4012 { 4013 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 4014 val.storeValue(inc); 4015 4016 return val; 4017 } 4018 4019 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y) 4020 { 4021 return IfThenElse(x > y, x, y); 4022 } 4023 4024 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y) 4025 { 4026 return IfThenElse(x < y, x, y); 4027 } 4028 4029 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max) 4030 { 4031 return Min(Max(x, min), max); 4032 } 4033 4034 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs) 4035 { 4036 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 4037 } 4038 4039 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs) 4040 { 4041 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 4042 } 4043 4044 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs) 4045 { 4046 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 4047 } 4048 4049 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs) 4050 { 4051 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 4052 } 4053 4054 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs) 4055 { 4056 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4057 } 4058 4059 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs) 4060 { 4061 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4062 } 4063 4064 // RValue<UInt> RoundUInt(RValue<Float> cast) 4065 // { 4066 // return x86::cvtss2si(val); // FIXME: Unsigned 4067 // 4068 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4069 // } 4070 4071 Type *UInt::getType() 4072 { 4073 return T(llvm::Type::getInt32Ty(*::context)); 4074 } 4075 4076 // Int2::Int2(RValue<Int> cast) 4077 // { 4078 // Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 4079 // Value *vector = Nucleus::createBitCast(extend, Int2::getType()); 4080 // 4081 // int shuffle[2] = {0, 0}; 4082 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle); 4083 // 4084 // storeValue(replicate); 4085 // } 4086 4087 Int2::Int2(RValue<Int4> cast) 4088 { 4089 storeValue(Nucleus::createBitCast(cast.value, getType())); 4090 } 4091 4092 Int2::Int2(int x, int y) 4093 { 4094 int64_t constantVector[2] = {x, y}; 4095 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4096 } 4097 4098 Int2::Int2(RValue<Int2> rhs) 4099 { 4100 storeValue(rhs.value); 4101 } 4102 4103 Int2::Int2(const Int2 &rhs) 4104 { 4105 Value *value = rhs.loadValue(); 4106 storeValue(value); 4107 } 4108 4109 Int2::Int2(const Reference<Int2> &rhs) 4110 { 4111 Value *value = rhs.loadValue(); 4112 storeValue(value); 4113 } 4114 4115 Int2::Int2(RValue<Int> lo, RValue<Int> hi) 4116 { 4117 int shuffle[4] = {0, 4, 1, 5}; 4118 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle); 4119 4120 storeValue(Nucleus::createBitCast(packed, Int2::getType())); 4121 } 4122 4123 RValue<Int2> Int2::operator=(RValue<Int2> rhs) 4124 { 4125 storeValue(rhs.value); 4126 4127 return rhs; 4128 } 4129 4130 RValue<Int2> Int2::operator=(const Int2 &rhs) 4131 { 4132 Value *value = rhs.loadValue(); 4133 storeValue(value); 4134 4135 return RValue<Int2>(value); 4136 } 4137 4138 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) 4139 { 4140 Value *value = rhs.loadValue(); 4141 storeValue(value); 4142 4143 return RValue<Int2>(value); 4144 } 4145 4146 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) 4147 { 4148 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value)); 4149 } 4150 4151 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) 4152 { 4153 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value)); 4154 } 4155 4156 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) 4157 // { 4158 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value)); 4159 // } 4160 4161 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs) 4162 // { 4163 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value)); 4164 // } 4165 4166 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs) 4167 // { 4168 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value)); 4169 // } 4170 4171 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) 4172 { 4173 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value)); 4174 } 4175 4176 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) 4177 { 4178 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value)); 4179 } 4180 4181 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) 4182 { 4183 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value)); 4184 } 4185 4186 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) 4187 { 4188 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4189 4190 return x86::pslld(lhs, rhs); 4191 } 4192 4193 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs) 4194 { 4195 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4196 4197 return x86::psrad(lhs, rhs); 4198 } 4199 4200 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs) 4201 { 4202 return lhs = lhs + rhs; 4203 } 4204 4205 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs) 4206 { 4207 return lhs = lhs - rhs; 4208 } 4209 4210 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs) 4211 // { 4212 // return lhs = lhs * rhs; 4213 // } 4214 4215 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs) 4216 // { 4217 // return lhs = lhs / rhs; 4218 // } 4219 4220 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs) 4221 // { 4222 // return lhs = lhs % rhs; 4223 // } 4224 4225 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs) 4226 { 4227 return lhs = lhs & rhs; 4228 } 4229 4230 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs) 4231 { 4232 return lhs = lhs | rhs; 4233 } 4234 4235 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs) 4236 { 4237 return lhs = lhs ^ rhs; 4238 } 4239 4240 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs) 4241 { 4242 return lhs = lhs << rhs; 4243 } 4244 4245 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs) 4246 { 4247 return lhs = lhs >> rhs; 4248 } 4249 4250 // RValue<Int2> operator+(RValue<Int2> val) 4251 // { 4252 // return val; 4253 // } 4254 4255 // RValue<Int2> operator-(RValue<Int2> val) 4256 // { 4257 // return RValue<Int2>(Nucleus::createNeg(val.value)); 4258 // } 4259 4260 RValue<Int2> operator~(RValue<Int2> val) 4261 { 4262 return RValue<Int2>(Nucleus::createNot(val.value)); 4263 } 4264 4265 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) 4266 { 4267 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 4268 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 4269 } 4270 4271 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) 4272 { 4273 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 4274 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 4275 return As<Short4>(Swizzle(lowHigh, 0xEE)); 4276 } 4277 4278 RValue<Int> Extract(RValue<Int2> val, int i) 4279 { 4280 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i)); 4281 } 4282 4283 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) 4284 { 4285 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i)); 4286 } 4287 4288 Type *Int2::getType() 4289 { 4290 return T(Type_v2i32); 4291 } 4292 4293 UInt2::UInt2(unsigned int x, unsigned int y) 4294 { 4295 int64_t constantVector[2] = {x, y}; 4296 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4297 } 4298 4299 UInt2::UInt2(RValue<UInt2> rhs) 4300 { 4301 storeValue(rhs.value); 4302 } 4303 4304 UInt2::UInt2(const UInt2 &rhs) 4305 { 4306 Value *value = rhs.loadValue(); 4307 storeValue(value); 4308 } 4309 4310 UInt2::UInt2(const Reference<UInt2> &rhs) 4311 { 4312 Value *value = rhs.loadValue(); 4313 storeValue(value); 4314 } 4315 4316 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) 4317 { 4318 storeValue(rhs.value); 4319 4320 return rhs; 4321 } 4322 4323 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) 4324 { 4325 Value *value = rhs.loadValue(); 4326 storeValue(value); 4327 4328 return RValue<UInt2>(value); 4329 } 4330 4331 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) 4332 { 4333 Value *value = rhs.loadValue(); 4334 storeValue(value); 4335 4336 return RValue<UInt2>(value); 4337 } 4338 4339 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) 4340 { 4341 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value)); 4342 } 4343 4344 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) 4345 { 4346 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value)); 4347 } 4348 4349 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) 4350 // { 4351 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value)); 4352 // } 4353 4354 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs) 4355 // { 4356 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value)); 4357 // } 4358 4359 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs) 4360 // { 4361 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value)); 4362 // } 4363 4364 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) 4365 { 4366 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value)); 4367 } 4368 4369 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) 4370 { 4371 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value)); 4372 } 4373 4374 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) 4375 { 4376 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value)); 4377 } 4378 4379 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) 4380 { 4381 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 4382 4383 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 4384 } 4385 4386 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs) 4387 { 4388 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 4389 4390 return x86::psrld(lhs, rhs); 4391 } 4392 4393 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs) 4394 { 4395 return lhs = lhs + rhs; 4396 } 4397 4398 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs) 4399 { 4400 return lhs = lhs - rhs; 4401 } 4402 4403 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs) 4404 // { 4405 // return lhs = lhs * rhs; 4406 // } 4407 4408 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs) 4409 // { 4410 // return lhs = lhs / rhs; 4411 // } 4412 4413 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs) 4414 // { 4415 // return lhs = lhs % rhs; 4416 // } 4417 4418 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs) 4419 { 4420 return lhs = lhs & rhs; 4421 } 4422 4423 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs) 4424 { 4425 return lhs = lhs | rhs; 4426 } 4427 4428 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs) 4429 { 4430 return lhs = lhs ^ rhs; 4431 } 4432 4433 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs) 4434 { 4435 return lhs = lhs << rhs; 4436 } 4437 4438 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs) 4439 { 4440 return lhs = lhs >> rhs; 4441 } 4442 4443 // RValue<UInt2> operator+(RValue<UInt2> val) 4444 // { 4445 // return val; 4446 // } 4447 4448 // RValue<UInt2> operator-(RValue<UInt2> val) 4449 // { 4450 // return RValue<UInt2>(Nucleus::createNeg(val.value)); 4451 // } 4452 4453 RValue<UInt2> operator~(RValue<UInt2> val) 4454 { 4455 return RValue<UInt2>(Nucleus::createNot(val.value)); 4456 } 4457 4458 Type *UInt2::getType() 4459 { 4460 return T(Type_v2i32); 4461 } 4462 4463 Int4::Int4() : XYZW(this) 4464 { 4465 } 4466 4467 Int4::Int4(RValue<Byte4> cast) : XYZW(this) 4468 { 4469 if(CPUID::supportsSSE4_1()) 4470 { 4471 *this = x86::pmovzxbd(As<Byte16>(cast)); 4472 } 4473 else 4474 { 4475 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; 4476 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 4477 Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle); 4478 4479 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 4480 Value *c = Nucleus::createBitCast(b, Short8::getType()); 4481 Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2); 4482 4483 *this = As<Int4>(d); 4484 } 4485 } 4486 4487 Int4::Int4(RValue<SByte4> cast) : XYZW(this) 4488 { 4489 if(CPUID::supportsSSE4_1()) 4490 { 4491 *this = x86::pmovsxbd(As<SByte16>(cast)); 4492 } 4493 else 4494 { 4495 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; 4496 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 4497 Value *b = Nucleus::createShuffleVector(a, a, swizzle); 4498 4499 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 4500 Value *c = Nucleus::createBitCast(b, Short8::getType()); 4501 Value *d = Nucleus::createShuffleVector(c, c, swizzle2); 4502 4503 *this = As<Int4>(d) >> 24; 4504 } 4505 } 4506 4507 Int4::Int4(RValue<Float4> cast) : XYZW(this) 4508 { 4509 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType()); 4510 4511 storeValue(xyzw); 4512 } 4513 4514 Int4::Int4(RValue<Short4> cast) : XYZW(this) 4515 { 4516 if(CPUID::supportsSSE4_1()) 4517 { 4518 *this = x86::pmovsxwd(As<Short8>(cast)); 4519 } 4520 else 4521 { 4522 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 4523 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle); 4524 *this = As<Int4>(c) >> 16; 4525 } 4526 } 4527 4528 Int4::Int4(RValue<UShort4> cast) : XYZW(this) 4529 { 4530 if(CPUID::supportsSSE4_1()) 4531 { 4532 *this = x86::pmovzxwd(As<UShort8>(cast)); 4533 } 4534 else 4535 { 4536 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 4537 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle); 4538 *this = As<Int4>(c); 4539 } 4540 } 4541 4542 Int4::Int4(int xyzw) : XYZW(this) 4543 { 4544 constant(xyzw, xyzw, xyzw, xyzw); 4545 } 4546 4547 Int4::Int4(int x, int yzw) : XYZW(this) 4548 { 4549 constant(x, yzw, yzw, yzw); 4550 } 4551 4552 Int4::Int4(int x, int y, int zw) : XYZW(this) 4553 { 4554 constant(x, y, zw, zw); 4555 } 4556 4557 Int4::Int4(int x, int y, int z, int w) : XYZW(this) 4558 { 4559 constant(x, y, z, w); 4560 } 4561 4562 void Int4::constant(int x, int y, int z, int w) 4563 { 4564 int64_t constantVector[4] = {x, y, z, w}; 4565 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4566 } 4567 4568 Int4::Int4(RValue<Int4> rhs) : XYZW(this) 4569 { 4570 storeValue(rhs.value); 4571 } 4572 4573 Int4::Int4(const Int4 &rhs) : XYZW(this) 4574 { 4575 Value *value = rhs.loadValue(); 4576 storeValue(value); 4577 } 4578 4579 Int4::Int4(const Reference<Int4> &rhs) : XYZW(this) 4580 { 4581 Value *value = rhs.loadValue(); 4582 storeValue(value); 4583 } 4584 4585 Int4::Int4(RValue<UInt4> rhs) : XYZW(this) 4586 { 4587 storeValue(rhs.value); 4588 } 4589 4590 Int4::Int4(const UInt4 &rhs) : XYZW(this) 4591 { 4592 Value *value = rhs.loadValue(); 4593 storeValue(value); 4594 } 4595 4596 Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this) 4597 { 4598 Value *value = rhs.loadValue(); 4599 storeValue(value); 4600 } 4601 4602 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this) 4603 { 4604 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 4605 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 4606 4607 storeValue(packed); 4608 } 4609 4610 Int4::Int4(RValue<Int> rhs) : XYZW(this) 4611 { 4612 Value *vector = loadValue(); 4613 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 4614 4615 int swizzle[4] = {0, 0, 0, 0}; 4616 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 4617 4618 storeValue(replicate); 4619 } 4620 4621 Int4::Int4(const Int &rhs) : XYZW(this) 4622 { 4623 *this = RValue<Int>(rhs.loadValue()); 4624 } 4625 4626 Int4::Int4(const Reference<Int> &rhs) : XYZW(this) 4627 { 4628 *this = RValue<Int>(rhs.loadValue()); 4629 } 4630 4631 RValue<Int4> Int4::operator=(RValue<Int4> rhs) 4632 { 4633 storeValue(rhs.value); 4634 4635 return rhs; 4636 } 4637 4638 RValue<Int4> Int4::operator=(const Int4 &rhs) 4639 { 4640 Value *value = rhs.loadValue(); 4641 storeValue(value); 4642 4643 return RValue<Int4>(value); 4644 } 4645 4646 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) 4647 { 4648 Value *value = rhs.loadValue(); 4649 storeValue(value); 4650 4651 return RValue<Int4>(value); 4652 } 4653 4654 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs) 4655 { 4656 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value)); 4657 } 4658 4659 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs) 4660 { 4661 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value)); 4662 } 4663 4664 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs) 4665 { 4666 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value)); 4667 } 4668 4669 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs) 4670 { 4671 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value)); 4672 } 4673 4674 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs) 4675 { 4676 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value)); 4677 } 4678 4679 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs) 4680 { 4681 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value)); 4682 } 4683 4684 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs) 4685 { 4686 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value)); 4687 } 4688 4689 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs) 4690 { 4691 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value)); 4692 } 4693 4694 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs) 4695 { 4696 return x86::pslld(lhs, rhs); 4697 } 4698 4699 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs) 4700 { 4701 return x86::psrad(lhs, rhs); 4702 } 4703 4704 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs) 4705 { 4706 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value)); 4707 } 4708 4709 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs) 4710 { 4711 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value)); 4712 } 4713 4714 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs) 4715 { 4716 return lhs = lhs + rhs; 4717 } 4718 4719 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs) 4720 { 4721 return lhs = lhs - rhs; 4722 } 4723 4724 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs) 4725 { 4726 return lhs = lhs * rhs; 4727 } 4728 4729 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs) 4730 // { 4731 // return lhs = lhs / rhs; 4732 // } 4733 4734 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs) 4735 // { 4736 // return lhs = lhs % rhs; 4737 // } 4738 4739 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs) 4740 { 4741 return lhs = lhs & rhs; 4742 } 4743 4744 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs) 4745 { 4746 return lhs = lhs | rhs; 4747 } 4748 4749 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs) 4750 { 4751 return lhs = lhs ^ rhs; 4752 } 4753 4754 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs) 4755 { 4756 return lhs = lhs << rhs; 4757 } 4758 4759 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs) 4760 { 4761 return lhs = lhs >> rhs; 4762 } 4763 4764 RValue<Int4> operator+(RValue<Int4> val) 4765 { 4766 return val; 4767 } 4768 4769 RValue<Int4> operator-(RValue<Int4> val) 4770 { 4771 return RValue<Int4>(Nucleus::createNeg(val.value)); 4772 } 4773 4774 RValue<Int4> operator~(RValue<Int4> val) 4775 { 4776 return RValue<Int4>(Nucleus::createNot(val.value)); 4777 } 4778 4779 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y) 4780 { 4781 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4782 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4783 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 4784 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4785 } 4786 4787 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y) 4788 { 4789 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4790 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4791 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())); 4792 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4793 } 4794 4795 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y) 4796 { 4797 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4798 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4799 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())); 4800 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4801 } 4802 4803 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y) 4804 { 4805 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4806 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4807 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 4808 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4809 } 4810 4811 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y) 4812 { 4813 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4814 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4815 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())); 4816 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4817 } 4818 4819 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y) 4820 { 4821 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 4822 // Restore the following line when LLVM is updated to a version where this issue is fixed. 4823 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())); 4824 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 4825 } 4826 4827 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y) 4828 { 4829 if(CPUID::supportsSSE4_1()) 4830 { 4831 return x86::pmaxsd(x, y); 4832 } 4833 else 4834 { 4835 RValue<Int4> greater = CmpNLE(x, y); 4836 return (x & greater) | (y & ~greater); 4837 } 4838 } 4839 4840 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y) 4841 { 4842 if(CPUID::supportsSSE4_1()) 4843 { 4844 return x86::pminsd(x, y); 4845 } 4846 else 4847 { 4848 RValue<Int4> less = CmpLT(x, y); 4849 return (x & less) | (y & ~less); 4850 } 4851 } 4852 4853 RValue<Int4> RoundInt(RValue<Float4> cast) 4854 { 4855 return x86::cvtps2dq(cast); 4856 } 4857 4858 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y) 4859 { 4860 return x86::packssdw(x, y); 4861 } 4862 4863 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y) 4864 { 4865 return x86::packusdw(x, y); 4866 } 4867 4868 RValue<Int> Extract(RValue<Int4> x, int i) 4869 { 4870 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i)); 4871 } 4872 4873 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i) 4874 { 4875 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i)); 4876 } 4877 4878 RValue<Int> SignMask(RValue<Int4> x) 4879 { 4880 return x86::movmskps(As<Float4>(x)); 4881 } 4882 4883 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) 4884 { 4885 return RValue<Int4>(createSwizzle4(x.value, select)); 4886 } 4887 4888 Type *Int4::getType() 4889 { 4890 return T(llvm::VectorType::get(T(Int::getType()), 4)); 4891 } 4892 4893 UInt4::UInt4() : XYZW(this) 4894 { 4895 } 4896 4897 UInt4::UInt4(RValue<Float4> cast) : XYZW(this) 4898 { 4899 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 4900 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType()); 4901 4902 // Smallest positive value representable in UInt, but not in Int 4903 const unsigned int ustart = 0x80000000u; 4904 const float ustartf = float(ustart); 4905 4906 // Check if the value can be represented as an Int 4907 Int4 uiValue = CmpNLT(cast, Float4(ustartf)); 4908 // If the value is too large, subtract ustart and re-add it after conversion. 4909 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) | 4910 // Otherwise, just convert normally 4911 (~uiValue & Int4(cast)); 4912 // If the value is negative, store 0, otherwise store the result of the conversion 4913 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value); 4914 } 4915 4916 UInt4::UInt4(int xyzw) : XYZW(this) 4917 { 4918 constant(xyzw, xyzw, xyzw, xyzw); 4919 } 4920 4921 UInt4::UInt4(int x, int yzw) : XYZW(this) 4922 { 4923 constant(x, yzw, yzw, yzw); 4924 } 4925 4926 UInt4::UInt4(int x, int y, int zw) : XYZW(this) 4927 { 4928 constant(x, y, zw, zw); 4929 } 4930 4931 UInt4::UInt4(int x, int y, int z, int w) : XYZW(this) 4932 { 4933 constant(x, y, z, w); 4934 } 4935 4936 void UInt4::constant(int x, int y, int z, int w) 4937 { 4938 int64_t constantVector[4] = {x, y, z, w}; 4939 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4940 } 4941 4942 UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this) 4943 { 4944 storeValue(rhs.value); 4945 } 4946 4947 UInt4::UInt4(const UInt4 &rhs) : XYZW(this) 4948 { 4949 Value *value = rhs.loadValue(); 4950 storeValue(value); 4951 } 4952 4953 UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this) 4954 { 4955 Value *value = rhs.loadValue(); 4956 storeValue(value); 4957 } 4958 4959 UInt4::UInt4(RValue<Int4> rhs) : XYZW(this) 4960 { 4961 storeValue(rhs.value); 4962 } 4963 4964 UInt4::UInt4(const Int4 &rhs) : XYZW(this) 4965 { 4966 Value *value = rhs.loadValue(); 4967 storeValue(value); 4968 } 4969 4970 UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this) 4971 { 4972 Value *value = rhs.loadValue(); 4973 storeValue(value); 4974 } 4975 4976 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this) 4977 { 4978 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 4979 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 4980 4981 storeValue(packed); 4982 } 4983 4984 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) 4985 { 4986 storeValue(rhs.value); 4987 4988 return rhs; 4989 } 4990 4991 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) 4992 { 4993 Value *value = rhs.loadValue(); 4994 storeValue(value); 4995 4996 return RValue<UInt4>(value); 4997 } 4998 4999 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) 5000 { 5001 Value *value = rhs.loadValue(); 5002 storeValue(value); 5003 5004 return RValue<UInt4>(value); 5005 } 5006 5007 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs) 5008 { 5009 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value)); 5010 } 5011 5012 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs) 5013 { 5014 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value)); 5015 } 5016 5017 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs) 5018 { 5019 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value)); 5020 } 5021 5022 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs) 5023 { 5024 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value)); 5025 } 5026 5027 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs) 5028 { 5029 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value)); 5030 } 5031 5032 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs) 5033 { 5034 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value)); 5035 } 5036 5037 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs) 5038 { 5039 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value)); 5040 } 5041 5042 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs) 5043 { 5044 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value)); 5045 } 5046 5047 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs) 5048 { 5049 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs)); 5050 } 5051 5052 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs) 5053 { 5054 return x86::psrld(lhs, rhs); 5055 } 5056 5057 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs) 5058 { 5059 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value)); 5060 } 5061 5062 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs) 5063 { 5064 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value)); 5065 } 5066 5067 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs) 5068 { 5069 return lhs = lhs + rhs; 5070 } 5071 5072 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs) 5073 { 5074 return lhs = lhs - rhs; 5075 } 5076 5077 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs) 5078 { 5079 return lhs = lhs * rhs; 5080 } 5081 5082 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs) 5083 // { 5084 // return lhs = lhs / rhs; 5085 // } 5086 5087 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs) 5088 // { 5089 // return lhs = lhs % rhs; 5090 // } 5091 5092 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs) 5093 { 5094 return lhs = lhs & rhs; 5095 } 5096 5097 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs) 5098 { 5099 return lhs = lhs | rhs; 5100 } 5101 5102 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs) 5103 { 5104 return lhs = lhs ^ rhs; 5105 } 5106 5107 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs) 5108 { 5109 return lhs = lhs << rhs; 5110 } 5111 5112 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs) 5113 { 5114 return lhs = lhs >> rhs; 5115 } 5116 5117 RValue<UInt4> operator+(RValue<UInt4> val) 5118 { 5119 return val; 5120 } 5121 5122 RValue<UInt4> operator-(RValue<UInt4> val) 5123 { 5124 return RValue<UInt4>(Nucleus::createNeg(val.value)); 5125 } 5126 5127 RValue<UInt4> operator~(RValue<UInt4> val) 5128 { 5129 return RValue<UInt4>(Nucleus::createNot(val.value)); 5130 } 5131 5132 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y) 5133 { 5134 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5135 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5136 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5137 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5138 } 5139 5140 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y) 5141 { 5142 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())); 5143 } 5144 5145 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y) 5146 { 5147 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5148 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5149 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType())); 5150 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5151 } 5152 5153 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y) 5154 { 5155 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5156 } 5157 5158 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y) 5159 { 5160 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5161 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5162 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType())); 5163 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5164 } 5165 5166 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y) 5167 { 5168 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())); 5169 } 5170 5171 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y) 5172 { 5173 if(CPUID::supportsSSE4_1()) 5174 { 5175 return x86::pmaxud(x, y); 5176 } 5177 else 5178 { 5179 RValue<UInt4> greater = CmpNLE(x, y); 5180 return (x & greater) | (y & ~greater); 5181 } 5182 } 5183 5184 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y) 5185 { 5186 if(CPUID::supportsSSE4_1()) 5187 { 5188 return x86::pminud(x, y); 5189 } 5190 else 5191 { 5192 RValue<UInt4> less = CmpLT(x, y); 5193 return (x & less) | (y & ~less); 5194 } 5195 } 5196 5197 Type *UInt4::getType() 5198 { 5199 return T(llvm::VectorType::get(T(UInt::getType()), 4)); 5200 } 5201 5202 Float::Float(RValue<Int> cast) 5203 { 5204 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType()); 5205 5206 storeValue(integer); 5207 } 5208 5209 Float::Float(RValue<UInt> cast) 5210 { 5211 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) + 5212 As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u))); 5213 5214 storeValue(result.value); 5215 } 5216 5217 Float::Float(float x) 5218 { 5219 storeValue(Nucleus::createConstantFloat(x)); 5220 } 5221 5222 Float::Float(RValue<Float> rhs) 5223 { 5224 storeValue(rhs.value); 5225 } 5226 5227 Float::Float(const Float &rhs) 5228 { 5229 Value *value = rhs.loadValue(); 5230 storeValue(value); 5231 } 5232 5233 Float::Float(const Reference<Float> &rhs) 5234 { 5235 Value *value = rhs.loadValue(); 5236 storeValue(value); 5237 } 5238 5239 RValue<Float> Float::operator=(RValue<Float> rhs) 5240 { 5241 storeValue(rhs.value); 5242 5243 return rhs; 5244 } 5245 5246 RValue<Float> Float::operator=(const Float &rhs) 5247 { 5248 Value *value = rhs.loadValue(); 5249 storeValue(value); 5250 5251 return RValue<Float>(value); 5252 } 5253 5254 RValue<Float> Float::operator=(const Reference<Float> &rhs) 5255 { 5256 Value *value = rhs.loadValue(); 5257 storeValue(value); 5258 5259 return RValue<Float>(value); 5260 } 5261 5262 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs) 5263 { 5264 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value)); 5265 } 5266 5267 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs) 5268 { 5269 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value)); 5270 } 5271 5272 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs) 5273 { 5274 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value)); 5275 } 5276 5277 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs) 5278 { 5279 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value)); 5280 } 5281 5282 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs) 5283 { 5284 return lhs = lhs + rhs; 5285 } 5286 5287 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs) 5288 { 5289 return lhs = lhs - rhs; 5290 } 5291 5292 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs) 5293 { 5294 return lhs = lhs * rhs; 5295 } 5296 5297 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs) 5298 { 5299 return lhs = lhs / rhs; 5300 } 5301 5302 RValue<Float> operator+(RValue<Float> val) 5303 { 5304 return val; 5305 } 5306 5307 RValue<Float> operator-(RValue<Float> val) 5308 { 5309 return RValue<Float>(Nucleus::createFNeg(val.value)); 5310 } 5311 5312 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs) 5313 { 5314 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value)); 5315 } 5316 5317 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs) 5318 { 5319 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value)); 5320 } 5321 5322 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs) 5323 { 5324 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value)); 5325 } 5326 5327 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs) 5328 { 5329 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value)); 5330 } 5331 5332 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs) 5333 { 5334 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value)); 5335 } 5336 5337 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs) 5338 { 5339 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value)); 5340 } 5341 5342 RValue<Float> Abs(RValue<Float> x) 5343 { 5344 return IfThenElse(x > 0.0f, x, -x); 5345 } 5346 5347 RValue<Float> Max(RValue<Float> x, RValue<Float> y) 5348 { 5349 return IfThenElse(x > y, x, y); 5350 } 5351 5352 RValue<Float> Min(RValue<Float> x, RValue<Float> y) 5353 { 5354 return IfThenElse(x < y, x, y); 5355 } 5356 5357 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2) 5358 { 5359 #if defined(__i386__) || defined(__x86_64__) 5360 if(exactAtPow2) 5361 { 5362 // rcpss uses a piecewise-linear approximation which minimizes the relative error 5363 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 5364 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 5365 } 5366 #endif 5367 5368 return x86::rcpss(x); 5369 } 5370 5371 RValue<Float> RcpSqrt_pp(RValue<Float> x) 5372 { 5373 return x86::rsqrtss(x); 5374 } 5375 5376 RValue<Float> Sqrt(RValue<Float> x) 5377 { 5378 return x86::sqrtss(x); 5379 } 5380 5381 RValue<Float> Round(RValue<Float> x) 5382 { 5383 if(CPUID::supportsSSE4_1()) 5384 { 5385 return x86::roundss(x, 0); 5386 } 5387 else 5388 { 5389 return Float4(Round(Float4(x))).x; 5390 } 5391 } 5392 5393 RValue<Float> Trunc(RValue<Float> x) 5394 { 5395 if(CPUID::supportsSSE4_1()) 5396 { 5397 return x86::roundss(x, 3); 5398 } 5399 else 5400 { 5401 return Float(Int(x)); // Rounded toward zero 5402 } 5403 } 5404 5405 RValue<Float> Frac(RValue<Float> x) 5406 { 5407 if(CPUID::supportsSSE4_1()) 5408 { 5409 return x - x86::floorss(x); 5410 } 5411 else 5412 { 5413 return Float4(Frac(Float4(x))).x; 5414 } 5415 } 5416 5417 RValue<Float> Floor(RValue<Float> x) 5418 { 5419 if(CPUID::supportsSSE4_1()) 5420 { 5421 return x86::floorss(x); 5422 } 5423 else 5424 { 5425 return Float4(Floor(Float4(x))).x; 5426 } 5427 } 5428 5429 RValue<Float> Ceil(RValue<Float> x) 5430 { 5431 if(CPUID::supportsSSE4_1()) 5432 { 5433 return x86::ceilss(x); 5434 } 5435 else 5436 { 5437 return Float4(Ceil(Float4(x))).x; 5438 } 5439 } 5440 5441 Type *Float::getType() 5442 { 5443 return T(llvm::Type::getFloatTy(*::context)); 5444 } 5445 5446 Float2::Float2(RValue<Float4> cast) 5447 { 5448 storeValue(Nucleus::createBitCast(cast.value, getType())); 5449 } 5450 5451 Type *Float2::getType() 5452 { 5453 return T(Type_v2f32); 5454 } 5455 5456 Float4::Float4(RValue<Byte4> cast) : XYZW(this) 5457 { 5458 Value *a = Int4(cast).loadValue(); 5459 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 5460 5461 storeValue(xyzw); 5462 } 5463 5464 Float4::Float4(RValue<SByte4> cast) : XYZW(this) 5465 { 5466 Value *a = Int4(cast).loadValue(); 5467 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 5468 5469 storeValue(xyzw); 5470 } 5471 5472 Float4::Float4(RValue<Short4> cast) : XYZW(this) 5473 { 5474 Int4 c(cast); 5475 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 5476 } 5477 5478 Float4::Float4(RValue<UShort4> cast) : XYZW(this) 5479 { 5480 Int4 c(cast); 5481 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 5482 } 5483 5484 Float4::Float4(RValue<Int4> cast) : XYZW(this) 5485 { 5486 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); 5487 5488 storeValue(xyzw); 5489 } 5490 5491 Float4::Float4(RValue<UInt4> cast) : XYZW(this) 5492 { 5493 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) + 5494 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u))); 5495 5496 storeValue(result.value); 5497 } 5498 5499 Float4::Float4() : XYZW(this) 5500 { 5501 } 5502 5503 Float4::Float4(float xyzw) : XYZW(this) 5504 { 5505 constant(xyzw, xyzw, xyzw, xyzw); 5506 } 5507 5508 Float4::Float4(float x, float yzw) : XYZW(this) 5509 { 5510 constant(x, yzw, yzw, yzw); 5511 } 5512 5513 Float4::Float4(float x, float y, float zw) : XYZW(this) 5514 { 5515 constant(x, y, zw, zw); 5516 } 5517 5518 Float4::Float4(float x, float y, float z, float w) : XYZW(this) 5519 { 5520 constant(x, y, z, w); 5521 } 5522 5523 void Float4::constant(float x, float y, float z, float w) 5524 { 5525 double constantVector[4] = {x, y, z, w}; 5526 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5527 } 5528 5529 Float4::Float4(RValue<Float4> rhs) : XYZW(this) 5530 { 5531 storeValue(rhs.value); 5532 } 5533 5534 Float4::Float4(const Float4 &rhs) : XYZW(this) 5535 { 5536 Value *value = rhs.loadValue(); 5537 storeValue(value); 5538 } 5539 5540 Float4::Float4(const Reference<Float4> &rhs) : XYZW(this) 5541 { 5542 Value *value = rhs.loadValue(); 5543 storeValue(value); 5544 } 5545 5546 Float4::Float4(RValue<Float> rhs) : XYZW(this) 5547 { 5548 Value *vector = loadValue(); 5549 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 5550 5551 int swizzle[4] = {0, 0, 0, 0}; 5552 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 5553 5554 storeValue(replicate); 5555 } 5556 5557 Float4::Float4(const Float &rhs) : XYZW(this) 5558 { 5559 *this = RValue<Float>(rhs.loadValue()); 5560 } 5561 5562 Float4::Float4(const Reference<Float> &rhs) : XYZW(this) 5563 { 5564 *this = RValue<Float>(rhs.loadValue()); 5565 } 5566 5567 RValue<Float4> Float4::operator=(float x) 5568 { 5569 return *this = Float4(x, x, x, x); 5570 } 5571 5572 RValue<Float4> Float4::operator=(RValue<Float4> rhs) 5573 { 5574 storeValue(rhs.value); 5575 5576 return rhs; 5577 } 5578 5579 RValue<Float4> Float4::operator=(const Float4 &rhs) 5580 { 5581 Value *value = rhs.loadValue(); 5582 storeValue(value); 5583 5584 return RValue<Float4>(value); 5585 } 5586 5587 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) 5588 { 5589 Value *value = rhs.loadValue(); 5590 storeValue(value); 5591 5592 return RValue<Float4>(value); 5593 } 5594 5595 RValue<Float4> Float4::operator=(RValue<Float> rhs) 5596 { 5597 return *this = Float4(rhs); 5598 } 5599 5600 RValue<Float4> Float4::operator=(const Float &rhs) 5601 { 5602 return *this = Float4(rhs); 5603 } 5604 5605 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) 5606 { 5607 return *this = Float4(rhs); 5608 } 5609 5610 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs) 5611 { 5612 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value)); 5613 } 5614 5615 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs) 5616 { 5617 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value)); 5618 } 5619 5620 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs) 5621 { 5622 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value)); 5623 } 5624 5625 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs) 5626 { 5627 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value)); 5628 } 5629 5630 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs) 5631 { 5632 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value)); 5633 } 5634 5635 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs) 5636 { 5637 return lhs = lhs + rhs; 5638 } 5639 5640 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs) 5641 { 5642 return lhs = lhs - rhs; 5643 } 5644 5645 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs) 5646 { 5647 return lhs = lhs * rhs; 5648 } 5649 5650 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs) 5651 { 5652 return lhs = lhs / rhs; 5653 } 5654 5655 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs) 5656 { 5657 return lhs = lhs % rhs; 5658 } 5659 5660 RValue<Float4> operator+(RValue<Float4> val) 5661 { 5662 return val; 5663 } 5664 5665 RValue<Float4> operator-(RValue<Float4> val) 5666 { 5667 return RValue<Float4>(Nucleus::createFNeg(val.value)); 5668 } 5669 5670 RValue<Float4> Abs(RValue<Float4> x) 5671 { 5672 Value *vector = Nucleus::createBitCast(x.value, Int4::getType()); 5673 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; 5674 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType()))); 5675 5676 return As<Float4>(result); 5677 } 5678 5679 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) 5680 { 5681 return x86::maxps(x, y); 5682 } 5683 5684 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y) 5685 { 5686 return x86::minps(x, y); 5687 } 5688 5689 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2) 5690 { 5691 #if defined(__i386__) || defined(__x86_64__) 5692 if(exactAtPow2) 5693 { 5694 // rcpps uses a piecewise-linear approximation which minimizes the relative error 5695 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 5696 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 5697 } 5698 #endif 5699 5700 return x86::rcpps(x); 5701 } 5702 5703 RValue<Float4> RcpSqrt_pp(RValue<Float4> x) 5704 { 5705 return x86::rsqrtps(x); 5706 } 5707 5708 RValue<Float4> Sqrt(RValue<Float4> x) 5709 { 5710 return x86::sqrtps(x); 5711 } 5712 5713 RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i) 5714 { 5715 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i)); 5716 } 5717 5718 RValue<Float> Extract(RValue<Float4> x, int i) 5719 { 5720 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i)); 5721 } 5722 5723 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select) 5724 { 5725 return RValue<Float4>(createSwizzle4(x.value, select)); 5726 } 5727 5728 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 5729 { 5730 int shuffle[4] = 5731 { 5732 ((imm >> 0) & 0x03) + 0, 5733 ((imm >> 2) & 0x03) + 0, 5734 ((imm >> 4) & 0x03) + 4, 5735 ((imm >> 6) & 0x03) + 4, 5736 }; 5737 5738 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5739 } 5740 5741 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y) 5742 { 5743 int shuffle[4] = {0, 4, 1, 5}; 5744 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5745 } 5746 5747 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y) 5748 { 5749 int shuffle[4] = {2, 6, 3, 7}; 5750 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5751 } 5752 5753 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) 5754 { 5755 Value *vector = lhs.loadValue(); 5756 Value *result = createMask4(vector, rhs.value, select); 5757 lhs.storeValue(result); 5758 5759 return RValue<Float4>(result); 5760 } 5761 5762 RValue<Int> SignMask(RValue<Float4> x) 5763 { 5764 return x86::movmskps(x); 5765 } 5766 5767 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y) 5768 { 5769 // return As<Int4>(x86::cmpeqps(x, y)); 5770 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType())); 5771 } 5772 5773 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y) 5774 { 5775 // return As<Int4>(x86::cmpltps(x, y)); 5776 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType())); 5777 } 5778 5779 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y) 5780 { 5781 // return As<Int4>(x86::cmpleps(x, y)); 5782 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType())); 5783 } 5784 5785 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y) 5786 { 5787 // return As<Int4>(x86::cmpneqps(x, y)); 5788 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType())); 5789 } 5790 5791 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y) 5792 { 5793 // return As<Int4>(x86::cmpnltps(x, y)); 5794 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType())); 5795 } 5796 5797 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y) 5798 { 5799 // return As<Int4>(x86::cmpnleps(x, y)); 5800 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType())); 5801 } 5802 5803 RValue<Int4> IsInf(RValue<Float4> x) 5804 { 5805 return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000)); 5806 } 5807 5808 RValue<Int4> IsNan(RValue<Float4> x) 5809 { 5810 return ~CmpEQ(x, x); 5811 } 5812 5813 RValue<Float4> Round(RValue<Float4> x) 5814 { 5815 if(CPUID::supportsSSE4_1()) 5816 { 5817 return x86::roundps(x, 0); 5818 } 5819 else 5820 { 5821 return Float4(RoundInt(x)); 5822 } 5823 } 5824 5825 RValue<Float4> Trunc(RValue<Float4> x) 5826 { 5827 if(CPUID::supportsSSE4_1()) 5828 { 5829 return x86::roundps(x, 3); 5830 } 5831 else 5832 { 5833 return Float4(Int4(x)); 5834 } 5835 } 5836 5837 RValue<Float4> Frac(RValue<Float4> x) 5838 { 5839 Float4 frc; 5840 5841 if(CPUID::supportsSSE4_1()) 5842 { 5843 frc = x - Floor(x); 5844 } 5845 else 5846 { 5847 frc = x - Float4(Int4(x)); // Signed fractional part. 5848 5849 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative. 5850 } 5851 5852 // x - floor(x) can be 1.0 for very small negative x. 5853 // Clamp against the value just below 1.0. 5854 return Min(frc, As<Float4>(Int4(0x3F7FFFFF))); 5855 } 5856 5857 RValue<Float4> Floor(RValue<Float4> x) 5858 { 5859 if(CPUID::supportsSSE4_1()) 5860 { 5861 return x86::floorps(x); 5862 } 5863 else 5864 { 5865 return x - Frac(x); 5866 } 5867 } 5868 5869 RValue<Float4> Ceil(RValue<Float4> x) 5870 { 5871 if(CPUID::supportsSSE4_1()) 5872 { 5873 return x86::ceilps(x); 5874 } 5875 else 5876 { 5877 return -Floor(-x); 5878 } 5879 } 5880 5881 Type *Float4::getType() 5882 { 5883 return T(llvm::VectorType::get(T(Float::getType()), 4)); 5884 } 5885 5886 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) 5887 { 5888 return lhs + RValue<Int>(Nucleus::createConstantInt(offset)); 5889 } 5890 5891 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 5892 { 5893 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false)); 5894 } 5895 5896 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 5897 { 5898 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true)); 5899 } 5900 5901 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset) 5902 { 5903 return lhs = lhs + offset; 5904 } 5905 5906 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset) 5907 { 5908 return lhs = lhs + offset; 5909 } 5910 5911 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset) 5912 { 5913 return lhs = lhs + offset; 5914 } 5915 5916 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset) 5917 { 5918 return lhs + -offset; 5919 } 5920 5921 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 5922 { 5923 return lhs + -offset; 5924 } 5925 5926 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 5927 { 5928 return lhs + -offset; 5929 } 5930 5931 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset) 5932 { 5933 return lhs = lhs - offset; 5934 } 5935 5936 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset) 5937 { 5938 return lhs = lhs - offset; 5939 } 5940 5941 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset) 5942 { 5943 return lhs = lhs - offset; 5944 } 5945 5946 void Return() 5947 { 5948 Nucleus::createRetVoid(); 5949 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 5950 Nucleus::createUnreachable(); 5951 } 5952 5953 void Return(RValue<Int> ret) 5954 { 5955 Nucleus::createRet(ret.value); 5956 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 5957 Nucleus::createUnreachable(); 5958 } 5959 5960 void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB) 5961 { 5962 Nucleus::createCondBr(cmp.value, bodyBB, endBB); 5963 Nucleus::setInsertBlock(bodyBB); 5964 } 5965 5966 RValue<Long> Ticks() 5967 { 5968 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter); 5969 5970 return RValue<Long>(V(::builder->CreateCall(rdtsc))); 5971 } 5972 } 5973 5974 namespace sw 5975 { 5976 namespace x86 5977 { 5978 RValue<Int> cvtss2si(RValue<Float> val) 5979 { 5980 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si); 5981 5982 Float4 vector; 5983 vector.x = val; 5984 5985 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value))); 5986 } 5987 5988 RValue<Int4> cvtps2dq(RValue<Float4> val) 5989 { 5990 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq); 5991 5992 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value))); 5993 } 5994 5995 RValue<Float> rcpss(RValue<Float> val) 5996 { 5997 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss); 5998 5999 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 6000 6001 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0)); 6002 } 6003 6004 RValue<Float> sqrtss(RValue<Float> val) 6005 { 6006 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss); 6007 6008 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 6009 6010 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0)); 6011 } 6012 6013 RValue<Float> rsqrtss(RValue<Float> val) 6014 { 6015 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss); 6016 6017 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 6018 6019 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0)); 6020 } 6021 6022 RValue<Float4> rcpps(RValue<Float4> val) 6023 { 6024 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps); 6025 6026 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value))); 6027 } 6028 6029 RValue<Float4> sqrtps(RValue<Float4> val) 6030 { 6031 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps); 6032 6033 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value))); 6034 } 6035 6036 RValue<Float4> rsqrtps(RValue<Float4> val) 6037 { 6038 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps); 6039 6040 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value))); 6041 } 6042 6043 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y) 6044 { 6045 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps); 6046 6047 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value))); 6048 } 6049 6050 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y) 6051 { 6052 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps); 6053 6054 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value))); 6055 } 6056 6057 RValue<Float> roundss(RValue<Float> val, unsigned char imm) 6058 { 6059 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss); 6060 6061 Value *undef = V(llvm::UndefValue::get(T(Float4::getType()))); 6062 Value *vector = Nucleus::createInsertElement(undef, val.value, 0); 6063 6064 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0)); 6065 } 6066 6067 RValue<Float> floorss(RValue<Float> val) 6068 { 6069 return roundss(val, 1); 6070 } 6071 6072 RValue<Float> ceilss(RValue<Float> val) 6073 { 6074 return roundss(val, 2); 6075 } 6076 6077 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm) 6078 { 6079 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps); 6080 6081 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm))))); 6082 } 6083 6084 RValue<Float4> floorps(RValue<Float4> val) 6085 { 6086 return roundps(val, 1); 6087 } 6088 6089 RValue<Float4> ceilps(RValue<Float4> val) 6090 { 6091 return roundps(val, 2); 6092 } 6093 6094 RValue<Int4> pabsd(RValue<Int4> x) 6095 { 6096 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128); 6097 6098 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value))); 6099 } 6100 6101 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) 6102 { 6103 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w); 6104 6105 return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value))); 6106 } 6107 6108 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) 6109 { 6110 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w); 6111 6112 return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value))); 6113 } 6114 6115 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) 6116 { 6117 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w); 6118 6119 return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value))); 6120 } 6121 6122 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) 6123 { 6124 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w); 6125 6126 return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value))); 6127 } 6128 6129 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) 6130 { 6131 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b); 6132 6133 return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value))); 6134 } 6135 6136 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) 6137 { 6138 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b); 6139 6140 return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value))); 6141 } 6142 6143 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) 6144 { 6145 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b); 6146 6147 return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value))); 6148 } 6149 6150 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) 6151 { 6152 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b); 6153 6154 return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value))); 6155 } 6156 6157 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) 6158 { 6159 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w); 6160 6161 return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value))); 6162 } 6163 6164 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) 6165 { 6166 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w); 6167 6168 return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value))); 6169 } 6170 6171 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) 6172 { 6173 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w); 6174 6175 return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value))); 6176 } 6177 6178 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) 6179 { 6180 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w); 6181 6182 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value))); 6183 } 6184 6185 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) 6186 { 6187 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w); 6188 6189 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value))); 6190 } 6191 6192 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) 6193 { 6194 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b); 6195 6196 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value))); 6197 } 6198 6199 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) 6200 { 6201 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b); 6202 6203 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value))); 6204 } 6205 6206 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) 6207 { 6208 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 6209 6210 return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value))); 6211 } 6212 6213 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) 6214 { 6215 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 6216 6217 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value))); 6218 } 6219 6220 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) 6221 { 6222 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128); 6223 6224 return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value))); 6225 } 6226 6227 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y) 6228 { 6229 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128); 6230 6231 return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value))); 6232 } 6233 6234 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y) 6235 { 6236 if(CPUID::supportsSSE4_1()) 6237 { 6238 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw); 6239 6240 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value))); 6241 } 6242 else 6243 { 6244 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000); 6245 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000); 6246 6247 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u)); 6248 } 6249 } 6250 6251 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) 6252 { 6253 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 6254 6255 return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y))))); 6256 } 6257 6258 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) 6259 { 6260 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 6261 6262 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y))))); 6263 } 6264 6265 RValue<Short4> psraw(RValue<Short4> x, unsigned char y) 6266 { 6267 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 6268 6269 return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y))))); 6270 } 6271 6272 RValue<Short8> psraw(RValue<Short8> x, unsigned char y) 6273 { 6274 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 6275 6276 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y))))); 6277 } 6278 6279 RValue<Short4> psllw(RValue<Short4> x, unsigned char y) 6280 { 6281 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 6282 6283 return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y))))); 6284 } 6285 6286 RValue<Short8> psllw(RValue<Short8> x, unsigned char y) 6287 { 6288 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 6289 6290 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y))))); 6291 } 6292 6293 RValue<Int2> pslld(RValue<Int2> x, unsigned char y) 6294 { 6295 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 6296 6297 return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y))))); 6298 } 6299 6300 RValue<Int4> pslld(RValue<Int4> x, unsigned char y) 6301 { 6302 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 6303 6304 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y))))); 6305 } 6306 6307 RValue<Int2> psrad(RValue<Int2> x, unsigned char y) 6308 { 6309 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 6310 6311 return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y))))); 6312 } 6313 6314 RValue<Int4> psrad(RValue<Int4> x, unsigned char y) 6315 { 6316 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 6317 6318 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y))))); 6319 } 6320 6321 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) 6322 { 6323 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 6324 6325 return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y))))); 6326 } 6327 6328 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) 6329 { 6330 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 6331 6332 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y))))); 6333 } 6334 6335 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) 6336 { 6337 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd); 6338 6339 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value))); 6340 } 6341 6342 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y) 6343 { 6344 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd); 6345 6346 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value))); 6347 } 6348 6349 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y) 6350 { 6351 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud); 6352 6353 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value))); 6354 } 6355 6356 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y) 6357 { 6358 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud); 6359 6360 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value))); 6361 } 6362 6363 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) 6364 { 6365 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 6366 6367 return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value))); 6368 } 6369 6370 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) 6371 { 6372 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 6373 6374 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value))); 6375 } 6376 6377 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) 6378 { 6379 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 6380 6381 return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value))); 6382 } 6383 6384 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) 6385 { 6386 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 6387 6388 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value))); 6389 } 6390 6391 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y) 6392 { 6393 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 6394 6395 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value))); 6396 } 6397 6398 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y) 6399 { 6400 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 6401 6402 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value))); 6403 } 6404 6405 RValue<Int> movmskps(RValue<Float4> x) 6406 { 6407 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps); 6408 6409 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value))); 6410 } 6411 6412 RValue<Int> pmovmskb(RValue<Byte8> x) 6413 { 6414 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128); 6415 6416 return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF; 6417 } 6418 6419 RValue<Int4> pmovzxbd(RValue<Byte16> x) 6420 { 6421 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd); 6422 6423 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value))); 6424 } 6425 6426 RValue<Int4> pmovsxbd(RValue<SByte16> x) 6427 { 6428 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd); 6429 6430 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value))); 6431 } 6432 6433 RValue<Int4> pmovzxwd(RValue<UShort8> x) 6434 { 6435 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd); 6436 6437 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value))); 6438 } 6439 6440 RValue<Int4> pmovsxwd(RValue<Short8> x) 6441 { 6442 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd); 6443 6444 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value))); 6445 } 6446 } 6447 } 6448