1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Nucleus.hpp" 16 17 #include "llvm/Support/IRBuilder.h" 18 #include "llvm/Function.h" 19 #include "llvm/GlobalVariable.h" 20 #include "llvm/Module.h" 21 #include "llvm/LLVMContext.h" 22 #include "llvm/Constants.h" 23 #include "llvm/Intrinsics.h" 24 #include "llvm/PassManager.h" 25 #include "llvm/Analysis/LoopPass.h" 26 #include "llvm/Transforms/Scalar.h" 27 #include "llvm/Target/TargetData.h" 28 #include "llvm/Target/TargetOptions.h" 29 #include "llvm/Support/TargetSelect.h" 30 #include "../lib/ExecutionEngine/JIT/JIT.h" 31 32 #include "Routine.hpp" 33 #include "RoutineManager.hpp" 34 #include "x86.hpp" 35 #include "CPUID.hpp" 36 #include "Thread.hpp" 37 #include "Memory.hpp" 38 39 #include <xmmintrin.h> 40 #include <fstream> 41 42 #if defined(__x86_64__) && defined(_WIN32) 43 extern "C" void X86CompilationCallback() 44 { 45 assert(false); // UNIMPLEMENTED 46 } 47 #endif 48 49 extern "C" 50 { 51 bool (*CodeAnalystInitialize)() = 0; 52 void (*CodeAnalystCompleteJITLog)() = 0; 53 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0; 54 } 55 56 namespace llvm 57 { 58 extern bool JITEmitDebugInfo; 59 } 60 61 namespace sw 62 { 63 Optimization optimization[10] = {InstructionCombining, Disabled}; 64 65 using namespace llvm; 66 67 RoutineManager *Nucleus::routineManager = 0; 68 ExecutionEngine *Nucleus::executionEngine = 0; 69 Builder *Nucleus::builder = 0; 70 LLVMContext *Nucleus::context = 0; 71 Module *Nucleus::module = 0; 72 llvm::Function *Nucleus::function = 0; 73 BackoffLock Nucleus::codegenMutex; 74 75 class Builder : public IRBuilder<> 76 { 77 }; 78 79 Nucleus::Nucleus() 80 { 81 codegenMutex.lock(); // Reactor and LLVM are currently not thread safe 82 83 InitializeNativeTarget(); 84 JITEmitDebugInfo = false; 85 86 if(!context) 87 { 88 context = new LLVMContext(); 89 } 90 91 module = new Module("", *context); 92 routineManager = new RoutineManager(); 93 94 #if defined(__x86_64__) 95 const char *architecture = "x86-64"; 96 #else 97 const char *architecture = "x86"; 98 #endif 99 100 SmallVector<std::string, 1> MAttrs; 101 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx"); 102 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov"); 103 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse"); 104 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2"); 105 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3"); 106 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3"); 107 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41"); 108 109 std::string error; 110 TargetMachine *targetMachine = EngineBuilder::selectTarget(module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error); 111 executionEngine = JIT::createJIT(module, 0, routineManager, CodeGenOpt::Aggressive, true, targetMachine); 112 113 if(!builder) 114 { 115 builder = static_cast<Builder*>(new IRBuilder<>(*context)); 116 117 #if defined(_WIN32) 118 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll"); 119 if(CodeAnalyst) 120 { 121 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize"); 122 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog"); 123 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode"); 124 125 CodeAnalystInitialize(); 126 } 127 #endif 128 } 129 } 130 131 Nucleus::~Nucleus() 132 { 133 delete executionEngine; 134 executionEngine = 0; 135 136 routineManager = 0; 137 function = 0; 138 module = 0; 139 140 codegenMutex.unlock(); 141 } 142 143 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations) 144 { 145 if(builder->GetInsertBlock()->empty() || !builder->GetInsertBlock()->back().isTerminator()) 146 { 147 Type *type = function->getReturnType(); 148 149 if(type->isVoidTy()) 150 { 151 createRetVoid(); 152 } 153 else 154 { 155 createRet(UndefValue::get(type)); 156 } 157 } 158 159 if(false) 160 { 161 std::string error; 162 raw_fd_ostream file("llvm-dump-unopt.txt", error); 163 module->print(file, 0); 164 } 165 166 if(runOptimizations) 167 { 168 optimize(); 169 } 170 171 if(false) 172 { 173 std::string error; 174 raw_fd_ostream file("llvm-dump-opt.txt", error); 175 module->print(file, 0); 176 } 177 178 void *entry = executionEngine->getPointerToFunction(function); 179 Routine *routine = routineManager->acquireRoutine(entry); 180 181 if(CodeAnalystLogJITCode) 182 { 183 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name); 184 } 185 186 return routine; 187 } 188 189 void Nucleus::optimize() 190 { 191 static PassManager *passManager = 0; 192 193 if(!passManager) 194 { 195 passManager = new PassManager(); 196 197 UnsafeFPMath = true; 198 // NoInfsFPMath = true; 199 // NoNaNsFPMath = true; 200 201 passManager->add(new TargetData(*executionEngine->getTargetData())); 202 passManager->add(createScalarReplAggregatesPass()); 203 204 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 205 { 206 switch(optimization[pass]) 207 { 208 case Disabled: break; 209 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break; 210 case LICM: passManager->add(createLICMPass()); break; 211 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break; 212 case GVN: passManager->add(createGVNPass()); break; 213 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break; 214 case Reassociate: passManager->add(createReassociatePass()); break; 215 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break; 216 case SCCP: passManager->add(createSCCPPass()); break; 217 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break; 218 default: 219 assert(false); 220 } 221 } 222 } 223 224 passManager->run(*module); 225 } 226 227 void Nucleus::setFunction(llvm::Function *function) 228 { 229 Nucleus::function = function; 230 231 builder->SetInsertPoint(BasicBlock::Create(*context, "", function)); 232 } 233 234 Module *Nucleus::getModule() 235 { 236 return module; 237 } 238 239 llvm::Function *Nucleus::getFunction() 240 { 241 return function; 242 } 243 244 llvm::LLVMContext *Nucleus::getContext() 245 { 246 return context; 247 } 248 249 Value *Nucleus::allocateStackVariable(Type *type, int arraySize) 250 { 251 // Need to allocate it in the entry block for mem2reg to work 252 llvm::Function *function = getFunction(); 253 BasicBlock &entryBlock = function->getEntryBlock(); 254 255 Instruction *declaration; 256 257 if(arraySize) 258 { 259 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize)); 260 } 261 else 262 { 263 declaration = new AllocaInst(type, (Value*)0); 264 } 265 266 entryBlock.getInstList().push_front(declaration); 267 268 return declaration; 269 } 270 271 BasicBlock *Nucleus::createBasicBlock() 272 { 273 return BasicBlock::Create(*context, "", Nucleus::getFunction()); 274 } 275 276 BasicBlock *Nucleus::getInsertBlock() 277 { 278 return builder->GetInsertBlock(); 279 } 280 281 void Nucleus::setInsertBlock(BasicBlock *basicBlock) 282 { 283 // assert(builder->GetInsertBlock()->back().isTerminator()); 284 return builder->SetInsertPoint(basicBlock); 285 } 286 287 BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock) 288 { 289 return *pred_begin(basicBlock); 290 } 291 292 llvm::Function *Nucleus::createFunction(llvm::Type *ReturnType, std::vector<llvm::Type*> &Params) 293 { 294 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, Params, false); 295 llvm::Function *function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", Nucleus::getModule()); 296 function->setCallingConv(llvm::CallingConv::C); 297 298 return function; 299 } 300 301 llvm::Value *Nucleus::getArgument(llvm::Function *function, unsigned int index) 302 { 303 llvm::Function::arg_iterator args = function->arg_begin(); 304 305 while(index) 306 { 307 args++; 308 index--; 309 } 310 311 return &*args; 312 } 313 314 Value *Nucleus::createRetVoid() 315 { 316 x86::emms(); 317 318 return builder->CreateRetVoid(); 319 } 320 321 Value *Nucleus::createRet(Value *V) 322 { 323 x86::emms(); 324 325 return builder->CreateRet(V); 326 } 327 328 Value *Nucleus::createBr(BasicBlock *dest) 329 { 330 return builder->CreateBr(dest); 331 } 332 333 Value *Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse) 334 { 335 return builder->CreateCondBr(cond, ifTrue, ifFalse); 336 } 337 338 Value *Nucleus::createAdd(Value *lhs, Value *rhs) 339 { 340 return builder->CreateAdd(lhs, rhs); 341 } 342 343 Value *Nucleus::createSub(Value *lhs, Value *rhs) 344 { 345 return builder->CreateSub(lhs, rhs); 346 } 347 348 Value *Nucleus::createMul(Value *lhs, Value *rhs) 349 { 350 return builder->CreateMul(lhs, rhs); 351 } 352 353 Value *Nucleus::createUDiv(Value *lhs, Value *rhs) 354 { 355 return builder->CreateUDiv(lhs, rhs); 356 } 357 358 Value *Nucleus::createSDiv(Value *lhs, Value *rhs) 359 { 360 return builder->CreateSDiv(lhs, rhs); 361 } 362 363 Value *Nucleus::createFAdd(Value *lhs, Value *rhs) 364 { 365 return builder->CreateFAdd(lhs, rhs); 366 } 367 368 Value *Nucleus::createFSub(Value *lhs, Value *rhs) 369 { 370 return builder->CreateFSub(lhs, rhs); 371 } 372 373 Value *Nucleus::createFMul(Value *lhs, Value *rhs) 374 { 375 return builder->CreateFMul(lhs, rhs); 376 } 377 378 Value *Nucleus::createFDiv(Value *lhs, Value *rhs) 379 { 380 return builder->CreateFDiv(lhs, rhs); 381 } 382 383 Value *Nucleus::createURem(Value *lhs, Value *rhs) 384 { 385 return builder->CreateURem(lhs, rhs); 386 } 387 388 Value *Nucleus::createSRem(Value *lhs, Value *rhs) 389 { 390 return builder->CreateSRem(lhs, rhs); 391 } 392 393 Value *Nucleus::createFRem(Value *lhs, Value *rhs) 394 { 395 return builder->CreateFRem(lhs, rhs); 396 } 397 398 Value *Nucleus::createShl(Value *lhs, Value *rhs) 399 { 400 return builder->CreateShl(lhs, rhs); 401 } 402 403 Value *Nucleus::createLShr(Value *lhs, Value *rhs) 404 { 405 return builder->CreateLShr(lhs, rhs); 406 } 407 408 Value *Nucleus::createAShr(Value *lhs, Value *rhs) 409 { 410 return builder->CreateAShr(lhs, rhs); 411 } 412 413 Value *Nucleus::createAnd(Value *lhs, Value *rhs) 414 { 415 return builder->CreateAnd(lhs, rhs); 416 } 417 418 Value *Nucleus::createOr(Value *lhs, Value *rhs) 419 { 420 return builder->CreateOr(lhs, rhs); 421 } 422 423 Value *Nucleus::createXor(Value *lhs, Value *rhs) 424 { 425 return builder->CreateXor(lhs, rhs); 426 } 427 428 Value *Nucleus::createNeg(Value *V) 429 { 430 return builder->CreateNeg(V); 431 } 432 433 Value *Nucleus::createFNeg(Value *V) 434 { 435 return builder->CreateFNeg(V); 436 } 437 438 Value *Nucleus::createNot(Value *V) 439 { 440 return builder->CreateNot(V); 441 } 442 443 Value *Nucleus::createLoad(Value *ptr, bool isVolatile, unsigned int align) 444 { 445 return builder->Insert(new LoadInst(ptr, "", isVolatile, align)); 446 } 447 448 Value *Nucleus::createStore(Value *value, Value *ptr, bool isVolatile, unsigned int align) 449 { 450 return builder->Insert(new StoreInst(value, ptr, isVolatile, align)); 451 } 452 453 Value *Nucleus::createGEP(Value *ptr, Value *index) 454 { 455 return builder->CreateGEP(ptr, index); 456 } 457 458 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) 459 { 460 return builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent); 461 } 462 463 Value *Nucleus::createTrunc(Value *V, Type *destType) 464 { 465 return builder->CreateTrunc(V, destType); 466 } 467 468 Value *Nucleus::createZExt(Value *V, Type *destType) 469 { 470 return builder->CreateZExt(V, destType); 471 } 472 473 Value *Nucleus::createSExt(Value *V, Type *destType) 474 { 475 return builder->CreateSExt(V, destType); 476 } 477 478 Value *Nucleus::createFPToUI(Value *V, Type *destType) 479 { 480 return builder->CreateFPToUI(V, destType); 481 } 482 483 Value *Nucleus::createFPToSI(Value *V, Type *destType) 484 { 485 return builder->CreateFPToSI(V, destType); 486 } 487 488 Value *Nucleus::createUIToFP(Value *V, Type *destType) 489 { 490 return builder->CreateUIToFP(V, destType); 491 } 492 493 Value *Nucleus::createSIToFP(Value *V, Type *destType) 494 { 495 return builder->CreateSIToFP(V, destType); 496 } 497 498 Value *Nucleus::createFPTrunc(Value *V, Type *destType) 499 { 500 return builder->CreateFPTrunc(V, destType); 501 } 502 503 Value *Nucleus::createFPExt(Value *V, Type *destType) 504 { 505 return builder->CreateFPExt(V, destType); 506 } 507 508 Value *Nucleus::createPtrToInt(Value *V, Type *destType) 509 { 510 return builder->CreatePtrToInt(V, destType); 511 } 512 513 Value *Nucleus::createIntToPtr(Value *V, Type *destType) 514 { 515 return builder->CreateIntToPtr(V, destType); 516 } 517 518 Value *Nucleus::createBitCast(Value *V, Type *destType) 519 { 520 return builder->CreateBitCast(V, destType); 521 } 522 523 Value *Nucleus::createIntCast(Value *V, Type *destType, bool isSigned) 524 { 525 return builder->CreateIntCast(V, destType, isSigned); 526 } 527 528 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) 529 { 530 return builder->CreateICmpEQ(lhs, rhs); 531 } 532 533 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs) 534 { 535 return builder->CreateICmpNE(lhs, rhs); 536 } 537 538 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs) 539 { 540 return builder->CreateICmpUGT(lhs, rhs); 541 } 542 543 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs) 544 { 545 return builder->CreateICmpUGE(lhs, rhs); 546 } 547 548 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs) 549 { 550 return builder->CreateICmpULT(lhs, rhs); 551 } 552 553 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs) 554 { 555 return builder->CreateICmpULE(lhs, rhs); 556 } 557 558 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs) 559 { 560 return builder->CreateICmpSGT(lhs, rhs); 561 } 562 563 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs) 564 { 565 return builder->CreateICmpSGE(lhs, rhs); 566 } 567 568 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs) 569 { 570 return builder->CreateICmpSLT(lhs, rhs); 571 } 572 573 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs) 574 { 575 return builder->CreateICmpSLE(lhs, rhs); 576 } 577 578 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs) 579 { 580 return builder->CreateFCmpOEQ(lhs, rhs); 581 } 582 583 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs) 584 { 585 return builder->CreateFCmpOGT(lhs, rhs); 586 } 587 588 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs) 589 { 590 return builder->CreateFCmpOGE(lhs, rhs); 591 } 592 593 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs) 594 { 595 return builder->CreateFCmpOLT(lhs, rhs); 596 } 597 598 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs) 599 { 600 return builder->CreateFCmpOLE(lhs, rhs); 601 } 602 603 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs) 604 { 605 return builder->CreateFCmpONE(lhs, rhs); 606 } 607 608 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs) 609 { 610 return builder->CreateFCmpORD(lhs, rhs); 611 } 612 613 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs) 614 { 615 return builder->CreateFCmpUNO(lhs, rhs); 616 } 617 618 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs) 619 { 620 return builder->CreateFCmpUEQ(lhs, rhs); 621 } 622 623 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs) 624 { 625 return builder->CreateFCmpUGT(lhs, rhs); 626 } 627 628 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs) 629 { 630 return builder->CreateFCmpUGE(lhs, rhs); 631 } 632 633 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs) 634 { 635 return builder->CreateFCmpULT(lhs, rhs); 636 } 637 638 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs) 639 { 640 return builder->CreateFCmpULE(lhs, rhs); 641 } 642 643 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs) 644 { 645 return builder->CreateFCmpULE(lhs, rhs); 646 } 647 648 Value *Nucleus::createCall(Value *callee) 649 { 650 return builder->CreateCall(callee); 651 } 652 653 Value *Nucleus::createCall(Value *callee, Value *arg) 654 { 655 return builder->CreateCall(callee, arg); 656 } 657 658 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2) 659 { 660 return builder->CreateCall2(callee, arg1, arg2); 661 } 662 663 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3) 664 { 665 return builder->CreateCall3(callee, arg1, arg2, arg3); 666 } 667 668 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3, Value *arg4) 669 { 670 return builder->CreateCall4(callee, arg1, arg2, arg3, arg4); 671 } 672 673 Value *Nucleus::createExtractElement(Value *vector, int index) 674 { 675 return builder->CreateExtractElement(vector, createConstantInt(index)); 676 } 677 678 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index) 679 { 680 return builder->CreateInsertElement(vector, element, createConstantInt(index)); 681 } 682 683 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask) 684 { 685 return builder->CreateShuffleVector(V1, V2, mask); 686 } 687 688 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse) 689 { 690 return builder->CreateSelect(C, ifTrue, ifFalse); 691 } 692 693 Value *Nucleus::createSwitch(llvm::Value *V, llvm::BasicBlock *Dest, unsigned NumCases) 694 { 695 return builder->CreateSwitch(V, Dest, NumCases); 696 } 697 698 void Nucleus::addSwitchCase(llvm::Value *Switch, int Case, llvm::BasicBlock *Branch) 699 { 700 static_cast<SwitchInst*>(Switch)->addCase(Nucleus::createConstantInt(Case), Branch); 701 } 702 703 Value *Nucleus::createUnreachable() 704 { 705 return builder->CreateUnreachable(); 706 } 707 708 Value *Nucleus::createSwizzle(Value *val, unsigned char select) 709 { 710 Constant *swizzle[4]; 711 swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03); 712 swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03); 713 swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03); 714 swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03); 715 716 Value *shuffle = Nucleus::createShuffleVector(val, UndefValue::get(val->getType()), Nucleus::createConstantVector(swizzle, 4)); 717 718 return shuffle; 719 } 720 721 Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select) 722 { 723 bool mask[4] = {false, false, false, false}; 724 725 mask[(select >> 0) & 0x03] = true; 726 mask[(select >> 2) & 0x03] = true; 727 mask[(select >> 4) & 0x03] = true; 728 mask[(select >> 6) & 0x03] = true; 729 730 Constant *swizzle[4]; 731 swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0); 732 swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1); 733 swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2); 734 swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3); 735 736 Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, Nucleus::createConstantVector(swizzle, 4)); 737 738 return shuffle; 739 } 740 741 const llvm::GlobalValue *Nucleus::getGlobalValueAtAddress(void *Addr) 742 { 743 return executionEngine->getGlobalValueAtAddress(Addr); 744 } 745 746 void Nucleus::addGlobalMapping(const llvm::GlobalValue *GV, void *Addr) 747 { 748 executionEngine->addGlobalMapping(GV, Addr); 749 } 750 751 llvm::GlobalValue *Nucleus::createGlobalValue(llvm::Type *Ty, bool isConstant, unsigned int Align) 752 { 753 llvm::GlobalValue *global = new llvm::GlobalVariable(*Nucleus::getModule(), Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, ""); 754 global->setAlignment(Align); 755 756 return global; 757 } 758 759 llvm::Type *Nucleus::getPointerType(llvm::Type *ElementType) 760 { 761 return llvm::PointerType::get(ElementType, 0); 762 } 763 764 llvm::Constant *Nucleus::createNullValue(llvm::Type *Ty) 765 { 766 return llvm::Constant::getNullValue(Ty); 767 } 768 769 llvm::ConstantInt *Nucleus::createConstantInt(int64_t i) 770 { 771 return llvm::ConstantInt::get(Type::getInt64Ty(*context), i, true); 772 } 773 774 llvm::ConstantInt *Nucleus::createConstantInt(int i) 775 { 776 return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, true); 777 } 778 779 llvm::ConstantInt *Nucleus::createConstantInt(unsigned int i) 780 { 781 return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, false); 782 } 783 784 llvm::ConstantInt *Nucleus::createConstantBool(bool b) 785 { 786 return llvm::ConstantInt::get(Type::getInt1Ty(*context), b); 787 } 788 789 llvm::ConstantInt *Nucleus::createConstantByte(signed char i) 790 { 791 return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, true); 792 } 793 794 llvm::ConstantInt *Nucleus::createConstantByte(unsigned char i) 795 { 796 return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, false); 797 } 798 799 llvm::ConstantInt *Nucleus::createConstantShort(short i) 800 { 801 return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, true); 802 } 803 804 llvm::ConstantInt *Nucleus::createConstantShort(unsigned short i) 805 { 806 return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, false); 807 } 808 809 llvm::Constant *Nucleus::createConstantFloat(float x) 810 { 811 return ConstantFP::get(Float::getType(), x); 812 } 813 814 llvm::Value *Nucleus::createNullPointer(llvm::Type *Ty) 815 { 816 return llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)); 817 } 818 819 llvm::Value *Nucleus::createConstantVector(llvm::Constant *const *Vals, unsigned NumVals) 820 { 821 return llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(Vals, NumVals)); 822 } 823 824 Type *Void::getType() 825 { 826 return Type::getVoidTy(*Nucleus::getContext()); 827 } 828 829 LValue::LValue(llvm::Type *type, int arraySize) 830 { 831 address = Nucleus::allocateStackVariable(type, arraySize); 832 } 833 834 llvm::Value *LValue::loadValue(unsigned int alignment) const 835 { 836 return Nucleus::createLoad(address, false, alignment); 837 } 838 839 llvm::Value *LValue::storeValue(llvm::Value *value, unsigned int alignment) const 840 { 841 return Nucleus::createStore(value, address, false, alignment); 842 } 843 844 llvm::Value *LValue::getAddress(llvm::Value *index) const 845 { 846 return Nucleus::createGEP(address, index); 847 } 848 849 Type *MMX::getType() 850 { 851 return Type::getX86_MMXTy(*Nucleus::getContext()); 852 } 853 854 Bool::Bool(Argument<Bool> argument) 855 { 856 storeValue(argument.value); 857 } 858 859 Bool::Bool() 860 { 861 } 862 863 Bool::Bool(bool x) 864 { 865 storeValue(Nucleus::createConstantBool(x)); 866 } 867 868 Bool::Bool(RValue<Bool> rhs) 869 { 870 storeValue(rhs.value); 871 } 872 873 Bool::Bool(const Bool &rhs) 874 { 875 Value *value = rhs.loadValue(); 876 storeValue(value); 877 } 878 879 Bool::Bool(const Reference<Bool> &rhs) 880 { 881 Value *value = rhs.loadValue(); 882 storeValue(value); 883 } 884 885 RValue<Bool> Bool::operator=(RValue<Bool> rhs) const 886 { 887 storeValue(rhs.value); 888 889 return rhs; 890 } 891 892 RValue<Bool> Bool::operator=(const Bool &rhs) const 893 { 894 Value *value = rhs.loadValue(); 895 storeValue(value); 896 897 return RValue<Bool>(value); 898 } 899 900 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const 901 { 902 Value *value = rhs.loadValue(); 903 storeValue(value); 904 905 return RValue<Bool>(value); 906 } 907 908 RValue<Bool> operator!(RValue<Bool> val) 909 { 910 return RValue<Bool>(Nucleus::createNot(val.value)); 911 } 912 913 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs) 914 { 915 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value)); 916 } 917 918 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs) 919 { 920 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value)); 921 } 922 923 Type *Bool::getType() 924 { 925 return Type::getInt1Ty(*Nucleus::getContext()); 926 } 927 928 Byte::Byte(Argument<Byte> argument) 929 { 930 storeValue(argument.value); 931 } 932 933 Byte::Byte(RValue<Int> cast) 934 { 935 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 936 937 storeValue(integer); 938 } 939 940 Byte::Byte(RValue<UInt> cast) 941 { 942 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 943 944 storeValue(integer); 945 } 946 947 Byte::Byte(RValue<UShort> cast) 948 { 949 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 950 951 storeValue(integer); 952 } 953 954 Byte::Byte() 955 { 956 } 957 958 Byte::Byte(int x) 959 { 960 storeValue(Nucleus::createConstantByte((unsigned char)x)); 961 } 962 963 Byte::Byte(unsigned char x) 964 { 965 storeValue(Nucleus::createConstantByte(x)); 966 } 967 968 Byte::Byte(RValue<Byte> rhs) 969 { 970 storeValue(rhs.value); 971 } 972 973 Byte::Byte(const Byte &rhs) 974 { 975 Value *value = rhs.loadValue(); 976 storeValue(value); 977 } 978 979 Byte::Byte(const Reference<Byte> &rhs) 980 { 981 Value *value = rhs.loadValue(); 982 storeValue(value); 983 } 984 985 RValue<Byte> Byte::operator=(RValue<Byte> rhs) const 986 { 987 storeValue(rhs.value); 988 989 return rhs; 990 } 991 992 RValue<Byte> Byte::operator=(const Byte &rhs) const 993 { 994 Value *value = rhs.loadValue(); 995 storeValue(value); 996 997 return RValue<Byte>(value); 998 } 999 1000 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const 1001 { 1002 Value *value = rhs.loadValue(); 1003 storeValue(value); 1004 1005 return RValue<Byte>(value); 1006 } 1007 1008 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs) 1009 { 1010 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value)); 1011 } 1012 1013 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs) 1014 { 1015 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value)); 1016 } 1017 1018 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs) 1019 { 1020 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value)); 1021 } 1022 1023 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs) 1024 { 1025 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value)); 1026 } 1027 1028 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs) 1029 { 1030 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value)); 1031 } 1032 1033 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs) 1034 { 1035 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value)); 1036 } 1037 1038 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs) 1039 { 1040 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value)); 1041 } 1042 1043 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs) 1044 { 1045 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value)); 1046 } 1047 1048 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs) 1049 { 1050 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value)); 1051 } 1052 1053 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs) 1054 { 1055 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value)); 1056 } 1057 1058 RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs) 1059 { 1060 return lhs = lhs + rhs; 1061 } 1062 1063 RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs) 1064 { 1065 return lhs = lhs - rhs; 1066 } 1067 1068 RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs) 1069 { 1070 return lhs = lhs * rhs; 1071 } 1072 1073 RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs) 1074 { 1075 return lhs = lhs / rhs; 1076 } 1077 1078 RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs) 1079 { 1080 return lhs = lhs % rhs; 1081 } 1082 1083 RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs) 1084 { 1085 return lhs = lhs & rhs; 1086 } 1087 1088 RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs) 1089 { 1090 return lhs = lhs | rhs; 1091 } 1092 1093 RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs) 1094 { 1095 return lhs = lhs ^ rhs; 1096 } 1097 1098 RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs) 1099 { 1100 return lhs = lhs << rhs; 1101 } 1102 1103 RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs) 1104 { 1105 return lhs = lhs >> rhs; 1106 } 1107 1108 RValue<Byte> operator+(RValue<Byte> val) 1109 { 1110 return val; 1111 } 1112 1113 RValue<Byte> operator-(RValue<Byte> val) 1114 { 1115 return RValue<Byte>(Nucleus::createNeg(val.value)); 1116 } 1117 1118 RValue<Byte> operator~(RValue<Byte> val) 1119 { 1120 return RValue<Byte>(Nucleus::createNot(val.value)); 1121 } 1122 1123 RValue<Byte> operator++(const Byte &val, int) // Post-increment 1124 { 1125 RValue<Byte> res = val; 1126 1127 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1)); 1128 val.storeValue(inc); 1129 1130 return res; 1131 } 1132 1133 const Byte &operator++(const Byte &val) // Pre-increment 1134 { 1135 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1)); 1136 val.storeValue(inc); 1137 1138 return val; 1139 } 1140 1141 RValue<Byte> operator--(const Byte &val, int) // Post-decrement 1142 { 1143 RValue<Byte> res = val; 1144 1145 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1)); 1146 val.storeValue(inc); 1147 1148 return res; 1149 } 1150 1151 const Byte &operator--(const Byte &val) // Pre-decrement 1152 { 1153 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1)); 1154 val.storeValue(inc); 1155 1156 return val; 1157 } 1158 1159 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs) 1160 { 1161 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1162 } 1163 1164 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs) 1165 { 1166 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1167 } 1168 1169 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs) 1170 { 1171 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1172 } 1173 1174 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs) 1175 { 1176 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1177 } 1178 1179 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs) 1180 { 1181 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1182 } 1183 1184 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs) 1185 { 1186 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1187 } 1188 1189 Type *Byte::getType() 1190 { 1191 return Type::getInt8Ty(*Nucleus::getContext()); 1192 } 1193 1194 SByte::SByte(Argument<SByte> argument) 1195 { 1196 storeValue(argument.value); 1197 } 1198 1199 SByte::SByte(RValue<Int> cast) 1200 { 1201 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1202 1203 storeValue(integer); 1204 } 1205 1206 SByte::SByte(RValue<Short> cast) 1207 { 1208 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1209 1210 storeValue(integer); 1211 } 1212 1213 SByte::SByte() 1214 { 1215 } 1216 1217 SByte::SByte(signed char x) 1218 { 1219 storeValue(Nucleus::createConstantByte(x)); 1220 } 1221 1222 SByte::SByte(RValue<SByte> rhs) 1223 { 1224 storeValue(rhs.value); 1225 } 1226 1227 SByte::SByte(const SByte &rhs) 1228 { 1229 Value *value = rhs.loadValue(); 1230 storeValue(value); 1231 } 1232 1233 SByte::SByte(const Reference<SByte> &rhs) 1234 { 1235 Value *value = rhs.loadValue(); 1236 storeValue(value); 1237 } 1238 1239 RValue<SByte> SByte::operator=(RValue<SByte> rhs) const 1240 { 1241 storeValue(rhs.value); 1242 1243 return rhs; 1244 } 1245 1246 RValue<SByte> SByte::operator=(const SByte &rhs) const 1247 { 1248 Value *value = rhs.loadValue(); 1249 storeValue(value); 1250 1251 return RValue<SByte>(value); 1252 } 1253 1254 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const 1255 { 1256 Value *value = rhs.loadValue(); 1257 storeValue(value); 1258 1259 return RValue<SByte>(value); 1260 } 1261 1262 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs) 1263 { 1264 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value)); 1265 } 1266 1267 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs) 1268 { 1269 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value)); 1270 } 1271 1272 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs) 1273 { 1274 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value)); 1275 } 1276 1277 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs) 1278 { 1279 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value)); 1280 } 1281 1282 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs) 1283 { 1284 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value)); 1285 } 1286 1287 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs) 1288 { 1289 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value)); 1290 } 1291 1292 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs) 1293 { 1294 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value)); 1295 } 1296 1297 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs) 1298 { 1299 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value)); 1300 } 1301 1302 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs) 1303 { 1304 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value)); 1305 } 1306 1307 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs) 1308 { 1309 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value)); 1310 } 1311 1312 RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs) 1313 { 1314 return lhs = lhs + rhs; 1315 } 1316 1317 RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs) 1318 { 1319 return lhs = lhs - rhs; 1320 } 1321 1322 RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs) 1323 { 1324 return lhs = lhs * rhs; 1325 } 1326 1327 RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs) 1328 { 1329 return lhs = lhs / rhs; 1330 } 1331 1332 RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs) 1333 { 1334 return lhs = lhs % rhs; 1335 } 1336 1337 RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs) 1338 { 1339 return lhs = lhs & rhs; 1340 } 1341 1342 RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs) 1343 { 1344 return lhs = lhs | rhs; 1345 } 1346 1347 RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs) 1348 { 1349 return lhs = lhs ^ rhs; 1350 } 1351 1352 RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs) 1353 { 1354 return lhs = lhs << rhs; 1355 } 1356 1357 RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs) 1358 { 1359 return lhs = lhs >> rhs; 1360 } 1361 1362 RValue<SByte> operator+(RValue<SByte> val) 1363 { 1364 return val; 1365 } 1366 1367 RValue<SByte> operator-(RValue<SByte> val) 1368 { 1369 return RValue<SByte>(Nucleus::createNeg(val.value)); 1370 } 1371 1372 RValue<SByte> operator~(RValue<SByte> val) 1373 { 1374 return RValue<SByte>(Nucleus::createNot(val.value)); 1375 } 1376 1377 RValue<SByte> operator++(const SByte &val, int) // Post-increment 1378 { 1379 RValue<SByte> res = val; 1380 1381 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1)); 1382 val.storeValue(inc); 1383 1384 return res; 1385 } 1386 1387 const SByte &operator++(const SByte &val) // Pre-increment 1388 { 1389 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1)); 1390 val.storeValue(inc); 1391 1392 return val; 1393 } 1394 1395 RValue<SByte> operator--(const SByte &val, int) // Post-decrement 1396 { 1397 RValue<SByte> res = val; 1398 1399 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1)); 1400 val.storeValue(inc); 1401 1402 return res; 1403 } 1404 1405 const SByte &operator--(const SByte &val) // Pre-decrement 1406 { 1407 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1)); 1408 val.storeValue(inc); 1409 1410 return val; 1411 } 1412 1413 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs) 1414 { 1415 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1416 } 1417 1418 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs) 1419 { 1420 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1421 } 1422 1423 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs) 1424 { 1425 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1426 } 1427 1428 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs) 1429 { 1430 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1431 } 1432 1433 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs) 1434 { 1435 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1436 } 1437 1438 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs) 1439 { 1440 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1441 } 1442 1443 Type *SByte::getType() 1444 { 1445 return Type::getInt8Ty(*Nucleus::getContext()); 1446 } 1447 1448 Short::Short(Argument<Short> argument) 1449 { 1450 storeValue(argument.value); 1451 } 1452 1453 Short::Short(RValue<Int> cast) 1454 { 1455 Value *integer = Nucleus::createTrunc(cast.value, Short::getType()); 1456 1457 storeValue(integer); 1458 } 1459 1460 Short::Short() 1461 { 1462 } 1463 1464 Short::Short(short x) 1465 { 1466 storeValue(Nucleus::createConstantShort(x)); 1467 } 1468 1469 Short::Short(RValue<Short> rhs) 1470 { 1471 storeValue(rhs.value); 1472 } 1473 1474 Short::Short(const Short &rhs) 1475 { 1476 Value *value = rhs.loadValue(); 1477 storeValue(value); 1478 } 1479 1480 Short::Short(const Reference<Short> &rhs) 1481 { 1482 Value *value = rhs.loadValue(); 1483 storeValue(value); 1484 } 1485 1486 RValue<Short> Short::operator=(RValue<Short> rhs) const 1487 { 1488 storeValue(rhs.value); 1489 1490 return rhs; 1491 } 1492 1493 RValue<Short> Short::operator=(const Short &rhs) const 1494 { 1495 Value *value = rhs.loadValue(); 1496 storeValue(value); 1497 1498 return RValue<Short>(value); 1499 } 1500 1501 RValue<Short> Short::operator=(const Reference<Short> &rhs) const 1502 { 1503 Value *value = rhs.loadValue(); 1504 storeValue(value); 1505 1506 return RValue<Short>(value); 1507 } 1508 1509 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs) 1510 { 1511 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value)); 1512 } 1513 1514 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs) 1515 { 1516 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value)); 1517 } 1518 1519 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs) 1520 { 1521 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value)); 1522 } 1523 1524 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs) 1525 { 1526 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value)); 1527 } 1528 1529 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs) 1530 { 1531 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value)); 1532 } 1533 1534 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs) 1535 { 1536 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value)); 1537 } 1538 1539 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs) 1540 { 1541 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value)); 1542 } 1543 1544 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs) 1545 { 1546 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value)); 1547 } 1548 1549 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs) 1550 { 1551 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value)); 1552 } 1553 1554 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs) 1555 { 1556 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value)); 1557 } 1558 1559 RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs) 1560 { 1561 return lhs = lhs + rhs; 1562 } 1563 1564 RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs) 1565 { 1566 return lhs = lhs - rhs; 1567 } 1568 1569 RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs) 1570 { 1571 return lhs = lhs * rhs; 1572 } 1573 1574 RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs) 1575 { 1576 return lhs = lhs / rhs; 1577 } 1578 1579 RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs) 1580 { 1581 return lhs = lhs % rhs; 1582 } 1583 1584 RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs) 1585 { 1586 return lhs = lhs & rhs; 1587 } 1588 1589 RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs) 1590 { 1591 return lhs = lhs | rhs; 1592 } 1593 1594 RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs) 1595 { 1596 return lhs = lhs ^ rhs; 1597 } 1598 1599 RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs) 1600 { 1601 return lhs = lhs << rhs; 1602 } 1603 1604 RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs) 1605 { 1606 return lhs = lhs >> rhs; 1607 } 1608 1609 RValue<Short> operator+(RValue<Short> val) 1610 { 1611 return val; 1612 } 1613 1614 RValue<Short> operator-(RValue<Short> val) 1615 { 1616 return RValue<Short>(Nucleus::createNeg(val.value)); 1617 } 1618 1619 RValue<Short> operator~(RValue<Short> val) 1620 { 1621 return RValue<Short>(Nucleus::createNot(val.value)); 1622 } 1623 1624 RValue<Short> operator++(const Short &val, int) // Post-increment 1625 { 1626 RValue<Short> res = val; 1627 1628 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1)); 1629 val.storeValue(inc); 1630 1631 return res; 1632 } 1633 1634 const Short &operator++(const Short &val) // Pre-increment 1635 { 1636 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1)); 1637 val.storeValue(inc); 1638 1639 return val; 1640 } 1641 1642 RValue<Short> operator--(const Short &val, int) // Post-decrement 1643 { 1644 RValue<Short> res = val; 1645 1646 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1)); 1647 val.storeValue(inc); 1648 1649 return res; 1650 } 1651 1652 const Short &operator--(const Short &val) // Pre-decrement 1653 { 1654 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1)); 1655 val.storeValue(inc); 1656 1657 return val; 1658 } 1659 1660 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs) 1661 { 1662 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1663 } 1664 1665 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs) 1666 { 1667 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1668 } 1669 1670 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs) 1671 { 1672 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1673 } 1674 1675 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs) 1676 { 1677 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1678 } 1679 1680 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs) 1681 { 1682 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1683 } 1684 1685 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs) 1686 { 1687 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1688 } 1689 1690 Type *Short::getType() 1691 { 1692 return Type::getInt16Ty(*Nucleus::getContext()); 1693 } 1694 1695 UShort::UShort(Argument<UShort> argument) 1696 { 1697 storeValue(argument.value); 1698 } 1699 1700 UShort::UShort(RValue<UInt> cast) 1701 { 1702 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1703 1704 storeValue(integer); 1705 } 1706 1707 UShort::UShort(RValue<Int> cast) 1708 { 1709 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1710 1711 storeValue(integer); 1712 } 1713 1714 UShort::UShort() 1715 { 1716 } 1717 1718 UShort::UShort(unsigned short x) 1719 { 1720 storeValue(Nucleus::createConstantShort(x)); 1721 } 1722 1723 UShort::UShort(RValue<UShort> rhs) 1724 { 1725 storeValue(rhs.value); 1726 } 1727 1728 UShort::UShort(const UShort &rhs) 1729 { 1730 Value *value = rhs.loadValue(); 1731 storeValue(value); 1732 } 1733 1734 UShort::UShort(const Reference<UShort> &rhs) 1735 { 1736 Value *value = rhs.loadValue(); 1737 storeValue(value); 1738 } 1739 1740 RValue<UShort> UShort::operator=(RValue<UShort> rhs) const 1741 { 1742 storeValue(rhs.value); 1743 1744 return rhs; 1745 } 1746 1747 RValue<UShort> UShort::operator=(const UShort &rhs) const 1748 { 1749 Value *value = rhs.loadValue(); 1750 storeValue(value); 1751 1752 return RValue<UShort>(value); 1753 } 1754 1755 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const 1756 { 1757 Value *value = rhs.loadValue(); 1758 storeValue(value); 1759 1760 return RValue<UShort>(value); 1761 } 1762 1763 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs) 1764 { 1765 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value)); 1766 } 1767 1768 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs) 1769 { 1770 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value)); 1771 } 1772 1773 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs) 1774 { 1775 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value)); 1776 } 1777 1778 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs) 1779 { 1780 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value)); 1781 } 1782 1783 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs) 1784 { 1785 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value)); 1786 } 1787 1788 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs) 1789 { 1790 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value)); 1791 } 1792 1793 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs) 1794 { 1795 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value)); 1796 } 1797 1798 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs) 1799 { 1800 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value)); 1801 } 1802 1803 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs) 1804 { 1805 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value)); 1806 } 1807 1808 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs) 1809 { 1810 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value)); 1811 } 1812 1813 RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs) 1814 { 1815 return lhs = lhs + rhs; 1816 } 1817 1818 RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs) 1819 { 1820 return lhs = lhs - rhs; 1821 } 1822 1823 RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs) 1824 { 1825 return lhs = lhs * rhs; 1826 } 1827 1828 RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs) 1829 { 1830 return lhs = lhs / rhs; 1831 } 1832 1833 RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs) 1834 { 1835 return lhs = lhs % rhs; 1836 } 1837 1838 RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs) 1839 { 1840 return lhs = lhs & rhs; 1841 } 1842 1843 RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs) 1844 { 1845 return lhs = lhs | rhs; 1846 } 1847 1848 RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs) 1849 { 1850 return lhs = lhs ^ rhs; 1851 } 1852 1853 RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs) 1854 { 1855 return lhs = lhs << rhs; 1856 } 1857 1858 RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs) 1859 { 1860 return lhs = lhs >> rhs; 1861 } 1862 1863 RValue<UShort> operator+(RValue<UShort> val) 1864 { 1865 return val; 1866 } 1867 1868 RValue<UShort> operator-(RValue<UShort> val) 1869 { 1870 return RValue<UShort>(Nucleus::createNeg(val.value)); 1871 } 1872 1873 RValue<UShort> operator~(RValue<UShort> val) 1874 { 1875 return RValue<UShort>(Nucleus::createNot(val.value)); 1876 } 1877 1878 RValue<UShort> operator++(const UShort &val, int) // Post-increment 1879 { 1880 RValue<UShort> res = val; 1881 1882 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1)); 1883 val.storeValue(inc); 1884 1885 return res; 1886 } 1887 1888 const UShort &operator++(const UShort &val) // Pre-increment 1889 { 1890 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1)); 1891 val.storeValue(inc); 1892 1893 return val; 1894 } 1895 1896 RValue<UShort> operator--(const UShort &val, int) // Post-decrement 1897 { 1898 RValue<UShort> res = val; 1899 1900 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1)); 1901 val.storeValue(inc); 1902 1903 return res; 1904 } 1905 1906 const UShort &operator--(const UShort &val) // Pre-decrement 1907 { 1908 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1)); 1909 val.storeValue(inc); 1910 1911 return val; 1912 } 1913 1914 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs) 1915 { 1916 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1917 } 1918 1919 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs) 1920 { 1921 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1922 } 1923 1924 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs) 1925 { 1926 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1927 } 1928 1929 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs) 1930 { 1931 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1932 } 1933 1934 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs) 1935 { 1936 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1937 } 1938 1939 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs) 1940 { 1941 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1942 } 1943 1944 Type *UShort::getType() 1945 { 1946 return Type::getInt16Ty(*Nucleus::getContext()); 1947 } 1948 1949 Type *Byte4::getType() 1950 { 1951 #if 0 1952 return VectorType::get(Byte::getType(), 4); 1953 #else 1954 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 1955 #endif 1956 } 1957 1958 Type *SByte4::getType() 1959 { 1960 #if 0 1961 return VectorType::get(SByte::getType(), 4); 1962 #else 1963 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 1964 #endif 1965 } 1966 1967 Byte8::Byte8() 1968 { 1969 // xyzw.parent = this; 1970 } 1971 1972 Byte8::Byte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7) 1973 { 1974 // xyzw.parent = this; 1975 1976 Constant *constantVector[8]; 1977 constantVector[0] = Nucleus::createConstantByte(x0); 1978 constantVector[1] = Nucleus::createConstantByte(x1); 1979 constantVector[2] = Nucleus::createConstantByte(x2); 1980 constantVector[3] = Nucleus::createConstantByte(x3); 1981 constantVector[4] = Nucleus::createConstantByte(x4); 1982 constantVector[5] = Nucleus::createConstantByte(x5); 1983 constantVector[6] = Nucleus::createConstantByte(x6); 1984 constantVector[7] = Nucleus::createConstantByte(x7); 1985 Value *vector = Nucleus::createConstantVector(constantVector, 8); 1986 1987 storeValue(Nucleus::createBitCast(vector, getType())); 1988 } 1989 1990 Byte8::Byte8(int64_t x) 1991 { 1992 // xyzw.parent = this; 1993 1994 Constant *constantVector[8]; 1995 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0)); 1996 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8)); 1997 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16)); 1998 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24)); 1999 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32)); 2000 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40)); 2001 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48)); 2002 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56)); 2003 Value *vector = Nucleus::createConstantVector(constantVector, 8); 2004 2005 storeValue(Nucleus::createBitCast(vector, getType())); 2006 } 2007 2008 Byte8::Byte8(RValue<Byte8> rhs) 2009 { 2010 // xyzw.parent = this; 2011 2012 storeValue(rhs.value); 2013 } 2014 2015 Byte8::Byte8(const Byte8 &rhs) 2016 { 2017 // xyzw.parent = this; 2018 2019 Value *value = rhs.loadValue(); 2020 storeValue(value); 2021 } 2022 2023 Byte8::Byte8(const Reference<Byte8> &rhs) 2024 { 2025 // xyzw.parent = this; 2026 2027 Value *value = rhs.loadValue(); 2028 storeValue(value); 2029 } 2030 2031 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const 2032 { 2033 storeValue(rhs.value); 2034 2035 return rhs; 2036 } 2037 2038 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const 2039 { 2040 Value *value = rhs.loadValue(); 2041 storeValue(value); 2042 2043 return RValue<Byte8>(value); 2044 } 2045 2046 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const 2047 { 2048 Value *value = rhs.loadValue(); 2049 storeValue(value); 2050 2051 return RValue<Byte8>(value); 2052 } 2053 2054 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) 2055 { 2056 if(CPUID::supportsMMX2()) 2057 { 2058 return x86::paddb(lhs, rhs); 2059 } 2060 else 2061 { 2062 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2063 } 2064 } 2065 2066 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) 2067 { 2068 if(CPUID::supportsMMX2()) 2069 { 2070 return x86::psubb(lhs, rhs); 2071 } 2072 else 2073 { 2074 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value)); 2075 } 2076 } 2077 2078 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) 2079 // { 2080 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value)); 2081 // } 2082 2083 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs) 2084 // { 2085 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value)); 2086 // } 2087 2088 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs) 2089 // { 2090 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value)); 2091 // } 2092 2093 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) 2094 { 2095 if(CPUID::supportsMMX2()) 2096 { 2097 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 2098 } 2099 else 2100 { 2101 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2102 } 2103 } 2104 2105 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) 2106 { 2107 if(CPUID::supportsMMX2()) 2108 { 2109 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 2110 } 2111 else 2112 { 2113 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value)); 2114 } 2115 } 2116 2117 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) 2118 { 2119 if(CPUID::supportsMMX2()) 2120 { 2121 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 2122 } 2123 else 2124 { 2125 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value)); 2126 } 2127 } 2128 2129 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) 2130 // { 2131 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value)); 2132 // } 2133 2134 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs) 2135 // { 2136 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value)); 2137 // } 2138 2139 RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs) 2140 { 2141 return lhs = lhs + rhs; 2142 } 2143 2144 RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs) 2145 { 2146 return lhs = lhs - rhs; 2147 } 2148 2149 // RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs) 2150 // { 2151 // return lhs = lhs * rhs; 2152 // } 2153 2154 // RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs) 2155 // { 2156 // return lhs = lhs / rhs; 2157 // } 2158 2159 // RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs) 2160 // { 2161 // return lhs = lhs % rhs; 2162 // } 2163 2164 RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs) 2165 { 2166 return lhs = lhs & rhs; 2167 } 2168 2169 RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs) 2170 { 2171 return lhs = lhs | rhs; 2172 } 2173 2174 RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs) 2175 { 2176 return lhs = lhs ^ rhs; 2177 } 2178 2179 // RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs) 2180 // { 2181 // return lhs = lhs << rhs; 2182 // } 2183 2184 // RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs) 2185 // { 2186 // return lhs = lhs >> rhs; 2187 // } 2188 2189 // RValue<Byte8> operator+(RValue<Byte8> val) 2190 // { 2191 // return val; 2192 // } 2193 2194 // RValue<Byte8> operator-(RValue<Byte8> val) 2195 // { 2196 // return RValue<Byte8>(Nucleus::createNeg(val.value)); 2197 // } 2198 2199 RValue<Byte8> operator~(RValue<Byte8> val) 2200 { 2201 if(CPUID::supportsMMX2()) 2202 { 2203 return val ^ Byte8(0xFFFFFFFFFFFFFFFF); 2204 } 2205 else 2206 { 2207 return RValue<Byte8>(Nucleus::createNot(val.value)); 2208 } 2209 } 2210 2211 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) 2212 { 2213 return x86::paddusb(x, y); 2214 } 2215 2216 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y) 2217 { 2218 return x86::psubusb(x, y); 2219 } 2220 2221 RValue<Short4> Unpack(RValue<Byte4> x) 2222 { 2223 Value *int2 = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), x.value, 0); 2224 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType()); 2225 2226 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8)); 2227 } 2228 2229 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) 2230 { 2231 if(CPUID::supportsMMX2()) 2232 { 2233 return x86::punpcklbw(x, y); 2234 } 2235 else 2236 { 2237 Constant *shuffle[8]; 2238 shuffle[0] = Nucleus::createConstantInt(0); 2239 shuffle[1] = Nucleus::createConstantInt(8); 2240 shuffle[2] = Nucleus::createConstantInt(1); 2241 shuffle[3] = Nucleus::createConstantInt(9); 2242 shuffle[4] = Nucleus::createConstantInt(2); 2243 shuffle[5] = Nucleus::createConstantInt(10); 2244 shuffle[6] = Nucleus::createConstantInt(3); 2245 shuffle[7] = Nucleus::createConstantInt(11); 2246 2247 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8)); 2248 2249 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2250 } 2251 } 2252 2253 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) 2254 { 2255 if(CPUID::supportsMMX2()) 2256 { 2257 return x86::punpckhbw(x, y); 2258 } 2259 else 2260 { 2261 Constant *shuffle[8]; 2262 shuffle[0] = Nucleus::createConstantInt(4); 2263 shuffle[1] = Nucleus::createConstantInt(12); 2264 shuffle[2] = Nucleus::createConstantInt(5); 2265 shuffle[3] = Nucleus::createConstantInt(13); 2266 shuffle[4] = Nucleus::createConstantInt(6); 2267 shuffle[5] = Nucleus::createConstantInt(14); 2268 shuffle[6] = Nucleus::createConstantInt(7); 2269 shuffle[7] = Nucleus::createConstantInt(15); 2270 2271 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8)); 2272 2273 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2274 } 2275 } 2276 2277 RValue<Int> SignMask(RValue<Byte8> x) 2278 { 2279 return x86::pmovmskb(x); 2280 } 2281 2282 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) 2283 // { 2284 // return x86::pcmpgtb(x, y); // FIXME: Signedness 2285 // } 2286 2287 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y) 2288 { 2289 return x86::pcmpeqb(x, y); 2290 } 2291 2292 Type *Byte8::getType() 2293 { 2294 if(CPUID::supportsMMX2()) 2295 { 2296 return MMX::getType(); 2297 } 2298 else 2299 { 2300 return VectorType::get(Byte::getType(), 8); 2301 } 2302 } 2303 2304 SByte8::SByte8() 2305 { 2306 // xyzw.parent = this; 2307 } 2308 2309 SByte8::SByte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7) 2310 { 2311 // xyzw.parent = this; 2312 2313 Constant *constantVector[8]; 2314 constantVector[0] = Nucleus::createConstantByte(x0); 2315 constantVector[1] = Nucleus::createConstantByte(x1); 2316 constantVector[2] = Nucleus::createConstantByte(x2); 2317 constantVector[3] = Nucleus::createConstantByte(x3); 2318 constantVector[4] = Nucleus::createConstantByte(x4); 2319 constantVector[5] = Nucleus::createConstantByte(x5); 2320 constantVector[6] = Nucleus::createConstantByte(x6); 2321 constantVector[7] = Nucleus::createConstantByte(x7); 2322 Value *vector = Nucleus::createConstantVector(constantVector, 8); 2323 2324 storeValue(Nucleus::createBitCast(vector, getType())); 2325 } 2326 2327 SByte8::SByte8(int64_t x) 2328 { 2329 // xyzw.parent = this; 2330 2331 Constant *constantVector[8]; 2332 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0)); 2333 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8)); 2334 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16)); 2335 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24)); 2336 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32)); 2337 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40)); 2338 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48)); 2339 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56)); 2340 Value *vector = Nucleus::createConstantVector(constantVector, 8); 2341 2342 storeValue(Nucleus::createBitCast(vector, getType())); 2343 } 2344 2345 SByte8::SByte8(RValue<SByte8> rhs) 2346 { 2347 // xyzw.parent = this; 2348 2349 storeValue(rhs.value); 2350 } 2351 2352 SByte8::SByte8(const SByte8 &rhs) 2353 { 2354 // xyzw.parent = this; 2355 2356 Value *value = rhs.loadValue(); 2357 storeValue(value); 2358 } 2359 2360 SByte8::SByte8(const Reference<SByte8> &rhs) 2361 { 2362 // xyzw.parent = this; 2363 2364 Value *value = rhs.loadValue(); 2365 storeValue(value); 2366 } 2367 2368 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const 2369 { 2370 storeValue(rhs.value); 2371 2372 return rhs; 2373 } 2374 2375 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const 2376 { 2377 Value *value = rhs.loadValue(); 2378 storeValue(value); 2379 2380 return RValue<SByte8>(value); 2381 } 2382 2383 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const 2384 { 2385 Value *value = rhs.loadValue(); 2386 storeValue(value); 2387 2388 return RValue<SByte8>(value); 2389 } 2390 2391 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) 2392 { 2393 if(CPUID::supportsMMX2()) 2394 { 2395 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs))); 2396 } 2397 else 2398 { 2399 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2400 } 2401 } 2402 2403 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) 2404 { 2405 if(CPUID::supportsMMX2()) 2406 { 2407 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs))); 2408 } 2409 else 2410 { 2411 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value)); 2412 } 2413 } 2414 2415 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) 2416 // { 2417 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value)); 2418 // } 2419 2420 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs) 2421 // { 2422 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value)); 2423 // } 2424 2425 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs) 2426 // { 2427 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value)); 2428 // } 2429 2430 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs) 2431 { 2432 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2433 } 2434 2435 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs) 2436 { 2437 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value)); 2438 } 2439 2440 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs) 2441 { 2442 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value)); 2443 } 2444 2445 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs) 2446 // { 2447 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value)); 2448 // } 2449 2450 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) 2451 // { 2452 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value)); 2453 // } 2454 2455 RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs) 2456 { 2457 return lhs = lhs + rhs; 2458 } 2459 2460 RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs) 2461 { 2462 return lhs = lhs - rhs; 2463 } 2464 2465 // RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs) 2466 // { 2467 // return lhs = lhs * rhs; 2468 // } 2469 2470 // RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs) 2471 // { 2472 // return lhs = lhs / rhs; 2473 // } 2474 2475 // RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs) 2476 // { 2477 // return lhs = lhs % rhs; 2478 // } 2479 2480 RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs) 2481 { 2482 return lhs = lhs & rhs; 2483 } 2484 2485 RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs) 2486 { 2487 return lhs = lhs | rhs; 2488 } 2489 2490 RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs) 2491 { 2492 return lhs = lhs ^ rhs; 2493 } 2494 2495 // RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs) 2496 // { 2497 // return lhs = lhs << rhs; 2498 // } 2499 2500 // RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs) 2501 // { 2502 // return lhs = lhs >> rhs; 2503 // } 2504 2505 // RValue<SByte8> operator+(RValue<SByte8> val) 2506 // { 2507 // return val; 2508 // } 2509 2510 // RValue<SByte8> operator-(RValue<SByte8> val) 2511 // { 2512 // return RValue<SByte8>(Nucleus::createNeg(val.value)); 2513 // } 2514 2515 RValue<SByte8> operator~(RValue<SByte8> val) 2516 { 2517 if(CPUID::supportsMMX2()) 2518 { 2519 return val ^ SByte8(0xFFFFFFFFFFFFFFFF); 2520 } 2521 else 2522 { 2523 return RValue<SByte8>(Nucleus::createNot(val.value)); 2524 } 2525 } 2526 2527 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) 2528 { 2529 return x86::paddsb(x, y); 2530 } 2531 2532 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y) 2533 { 2534 return x86::psubsb(x, y); 2535 } 2536 2537 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) 2538 { 2539 if(CPUID::supportsMMX2()) 2540 { 2541 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y))); 2542 } 2543 else 2544 { 2545 Constant *shuffle[8]; 2546 shuffle[0] = Nucleus::createConstantInt(0); 2547 shuffle[1] = Nucleus::createConstantInt(8); 2548 shuffle[2] = Nucleus::createConstantInt(1); 2549 shuffle[3] = Nucleus::createConstantInt(9); 2550 shuffle[4] = Nucleus::createConstantInt(2); 2551 shuffle[5] = Nucleus::createConstantInt(10); 2552 shuffle[6] = Nucleus::createConstantInt(3); 2553 shuffle[7] = Nucleus::createConstantInt(11); 2554 2555 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8)); 2556 2557 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2558 } 2559 } 2560 2561 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) 2562 { 2563 if(CPUID::supportsMMX2()) 2564 { 2565 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y))); 2566 } 2567 else 2568 { 2569 Constant *shuffle[8]; 2570 shuffle[0] = Nucleus::createConstantInt(4); 2571 shuffle[1] = Nucleus::createConstantInt(12); 2572 shuffle[2] = Nucleus::createConstantInt(5); 2573 shuffle[3] = Nucleus::createConstantInt(13); 2574 shuffle[4] = Nucleus::createConstantInt(6); 2575 shuffle[5] = Nucleus::createConstantInt(14); 2576 shuffle[6] = Nucleus::createConstantInt(7); 2577 shuffle[7] = Nucleus::createConstantInt(15); 2578 2579 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8)); 2580 2581 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2582 } 2583 } 2584 2585 RValue<Int> SignMask(RValue<SByte8> x) 2586 { 2587 return x86::pmovmskb(As<Byte8>(x)); 2588 } 2589 2590 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) 2591 { 2592 return x86::pcmpgtb(x, y); 2593 } 2594 2595 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y) 2596 { 2597 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y)); 2598 } 2599 2600 Type *SByte8::getType() 2601 { 2602 if(CPUID::supportsMMX2()) 2603 { 2604 return MMX::getType(); 2605 } 2606 else 2607 { 2608 return VectorType::get(SByte::getType(), 8); 2609 } 2610 } 2611 2612 Byte16::Byte16(RValue<Byte16> rhs) 2613 { 2614 // xyzw.parent = this; 2615 2616 storeValue(rhs.value); 2617 } 2618 2619 Byte16::Byte16(const Byte16 &rhs) 2620 { 2621 // xyzw.parent = this; 2622 2623 Value *value = rhs.loadValue(); 2624 storeValue(value); 2625 } 2626 2627 Byte16::Byte16(const Reference<Byte16> &rhs) 2628 { 2629 // xyzw.parent = this; 2630 2631 Value *value = rhs.loadValue(); 2632 storeValue(value); 2633 } 2634 2635 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const 2636 { 2637 storeValue(rhs.value); 2638 2639 return rhs; 2640 } 2641 2642 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const 2643 { 2644 Value *value = rhs.loadValue(); 2645 storeValue(value); 2646 2647 return RValue<Byte16>(value); 2648 } 2649 2650 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const 2651 { 2652 Value *value = rhs.loadValue(); 2653 storeValue(value); 2654 2655 return RValue<Byte16>(value); 2656 } 2657 2658 Type *Byte16::getType() 2659 { 2660 return VectorType::get(Byte::getType(), 16); 2661 } 2662 2663 Type *SByte16::getType() 2664 { 2665 return VectorType::get(SByte::getType(), 16); 2666 } 2667 2668 Short4::Short4(RValue<Int> cast) 2669 { 2670 Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 2671 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value; 2672 2673 storeValue(swizzle); 2674 } 2675 2676 Short4::Short4(RValue<Int4> cast) 2677 { 2678 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); 2679 2680 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd) 2681 Constant *pack[8]; 2682 pack[0] = Nucleus::createConstantInt(0); 2683 pack[1] = Nucleus::createConstantInt(2); 2684 pack[2] = Nucleus::createConstantInt(4); 2685 pack[3] = Nucleus::createConstantInt(6); 2686 2687 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4)); 2688 #else 2689 Value *packed; 2690 2691 // FIXME: Use Swizzle<Short8> 2692 if(!CPUID::supportsSSSE3()) 2693 { 2694 Constant *pshuflw[8]; 2695 pshuflw[0] = Nucleus::createConstantInt(0); 2696 pshuflw[1] = Nucleus::createConstantInt(2); 2697 pshuflw[2] = Nucleus::createConstantInt(0); 2698 pshuflw[3] = Nucleus::createConstantInt(2); 2699 pshuflw[4] = Nucleus::createConstantInt(4); 2700 pshuflw[5] = Nucleus::createConstantInt(5); 2701 pshuflw[6] = Nucleus::createConstantInt(6); 2702 pshuflw[7] = Nucleus::createConstantInt(7); 2703 2704 Constant *pshufhw[8]; 2705 pshufhw[0] = Nucleus::createConstantInt(0); 2706 pshufhw[1] = Nucleus::createConstantInt(1); 2707 pshufhw[2] = Nucleus::createConstantInt(2); 2708 pshufhw[3] = Nucleus::createConstantInt(3); 2709 pshufhw[4] = Nucleus::createConstantInt(4); 2710 pshufhw[5] = Nucleus::createConstantInt(6); 2711 pshufhw[6] = Nucleus::createConstantInt(4); 2712 pshufhw[7] = Nucleus::createConstantInt(6); 2713 2714 Value *shuffle1 = Nucleus::createShuffleVector(short8, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshuflw, 8)); 2715 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshufhw, 8)); 2716 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType()); 2717 packed = Nucleus::createSwizzle(int4, 0x88); 2718 } 2719 else 2720 { 2721 Constant *pshufb[16]; 2722 pshufb[0] = Nucleus::createConstantInt(0); 2723 pshufb[1] = Nucleus::createConstantInt(1); 2724 pshufb[2] = Nucleus::createConstantInt(4); 2725 pshufb[3] = Nucleus::createConstantInt(5); 2726 pshufb[4] = Nucleus::createConstantInt(8); 2727 pshufb[5] = Nucleus::createConstantInt(9); 2728 pshufb[6] = Nucleus::createConstantInt(12); 2729 pshufb[7] = Nucleus::createConstantInt(13); 2730 pshufb[8] = Nucleus::createConstantInt(0); 2731 pshufb[9] = Nucleus::createConstantInt(1); 2732 pshufb[10] = Nucleus::createConstantInt(4); 2733 pshufb[11] = Nucleus::createConstantInt(5); 2734 pshufb[12] = Nucleus::createConstantInt(8); 2735 pshufb[13] = Nucleus::createConstantInt(9); 2736 pshufb[14] = Nucleus::createConstantInt(12); 2737 pshufb[15] = Nucleus::createConstantInt(13); 2738 2739 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType()); 2740 packed = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16)); 2741 } 2742 2743 #if 0 // FIXME: No optimal instruction selection 2744 Value *qword2 = Nucleus::createBitCast(packed, Long2::getType()); 2745 Value *element = Nucleus::createExtractElement(qword2, 0); 2746 Value *short4 = Nucleus::createBitCast(element, Short4::getType()); 2747 #else // FIXME: Requires SSE 2748 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value; 2749 Value *short4 = Nucleus::createBitCast(int2, Short4::getType()); 2750 #endif 2751 #endif 2752 2753 storeValue(short4); 2754 } 2755 2756 // Short4::Short4(RValue<Float> cast) 2757 // { 2758 // } 2759 2760 Short4::Short4(RValue<Float4> cast) 2761 { 2762 Int4 v4i32 = Int4(cast); 2763 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32)); 2764 2765 storeValue(As<Short4>(Int2(v4i32)).value); 2766 } 2767 2768 Short4::Short4() 2769 { 2770 // xyzw.parent = this; 2771 } 2772 2773 Short4::Short4(short xyzw) 2774 { 2775 // xyzw.parent = this; 2776 2777 Constant *constantVector[4]; 2778 constantVector[0] = Nucleus::createConstantShort(xyzw); 2779 constantVector[1] = Nucleus::createConstantShort(xyzw); 2780 constantVector[2] = Nucleus::createConstantShort(xyzw); 2781 constantVector[3] = Nucleus::createConstantShort(xyzw); 2782 Value *vector = Nucleus::createConstantVector(constantVector, 4); 2783 2784 storeValue(Nucleus::createBitCast(vector, getType())); 2785 } 2786 2787 Short4::Short4(short x, short y, short z, short w) 2788 { 2789 // xyzw.parent = this; 2790 2791 Constant *constantVector[4]; 2792 constantVector[0] = Nucleus::createConstantShort(x); 2793 constantVector[1] = Nucleus::createConstantShort(y); 2794 constantVector[2] = Nucleus::createConstantShort(z); 2795 constantVector[3] = Nucleus::createConstantShort(w); 2796 Value *vector = Nucleus::createConstantVector(constantVector, 4); 2797 2798 storeValue(Nucleus::createBitCast(vector, getType())); 2799 } 2800 2801 Short4::Short4(RValue<Short4> rhs) 2802 { 2803 // xyzw.parent = this; 2804 2805 storeValue(rhs.value); 2806 } 2807 2808 Short4::Short4(const Short4 &rhs) 2809 { 2810 // xyzw.parent = this; 2811 2812 Value *value = rhs.loadValue(); 2813 storeValue(value); 2814 } 2815 2816 Short4::Short4(const Reference<Short4> &rhs) 2817 { 2818 // xyzw.parent = this; 2819 2820 Value *value = rhs.loadValue(); 2821 storeValue(value); 2822 } 2823 2824 Short4::Short4(RValue<UShort4> rhs) 2825 { 2826 // xyzw.parent = this; 2827 2828 storeValue(rhs.value); 2829 } 2830 2831 Short4::Short4(const UShort4 &rhs) 2832 { 2833 // xyzw.parent = this; 2834 2835 storeValue(rhs.loadValue()); 2836 } 2837 2838 Short4::Short4(const Reference<UShort4> &rhs) 2839 { 2840 // xyzw.parent = this; 2841 2842 storeValue(rhs.loadValue()); 2843 } 2844 2845 RValue<Short4> Short4::operator=(RValue<Short4> rhs) const 2846 { 2847 storeValue(rhs.value); 2848 2849 return rhs; 2850 } 2851 2852 RValue<Short4> Short4::operator=(const Short4 &rhs) const 2853 { 2854 Value *value = rhs.loadValue(); 2855 storeValue(value); 2856 2857 return RValue<Short4>(value); 2858 } 2859 2860 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const 2861 { 2862 Value *value = rhs.loadValue(); 2863 storeValue(value); 2864 2865 return RValue<Short4>(value); 2866 } 2867 2868 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const 2869 { 2870 storeValue(rhs.value); 2871 2872 return RValue<Short4>(rhs); 2873 } 2874 2875 RValue<Short4> Short4::operator=(const UShort4 &rhs) const 2876 { 2877 Value *value = rhs.loadValue(); 2878 storeValue(value); 2879 2880 return RValue<Short4>(value); 2881 } 2882 2883 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const 2884 { 2885 Value *value = rhs.loadValue(); 2886 storeValue(value); 2887 2888 return RValue<Short4>(value); 2889 } 2890 2891 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) 2892 { 2893 if(CPUID::supportsMMX2()) 2894 { 2895 return x86::paddw(lhs, rhs); 2896 } 2897 else 2898 { 2899 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value)); 2900 } 2901 } 2902 2903 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) 2904 { 2905 if(CPUID::supportsMMX2()) 2906 { 2907 return x86::psubw(lhs, rhs); 2908 } 2909 else 2910 { 2911 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value)); 2912 } 2913 } 2914 2915 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) 2916 { 2917 if(CPUID::supportsMMX2()) 2918 { 2919 return x86::pmullw(lhs, rhs); 2920 } 2921 else 2922 { 2923 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value)); 2924 } 2925 } 2926 2927 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) 2928 // { 2929 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value)); 2930 // } 2931 2932 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs) 2933 // { 2934 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value)); 2935 // } 2936 2937 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) 2938 { 2939 if(CPUID::supportsMMX2()) 2940 { 2941 return x86::pand(lhs, rhs); 2942 } 2943 else 2944 { 2945 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value)); 2946 } 2947 } 2948 2949 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) 2950 { 2951 if(CPUID::supportsMMX2()) 2952 { 2953 return x86::por(lhs, rhs); 2954 } 2955 else 2956 { 2957 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value)); 2958 } 2959 } 2960 2961 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) 2962 { 2963 if(CPUID::supportsMMX2()) 2964 { 2965 return x86::pxor(lhs, rhs); 2966 } 2967 else 2968 { 2969 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value)); 2970 } 2971 } 2972 2973 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) 2974 { 2975 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2976 2977 return x86::psllw(lhs, rhs); 2978 } 2979 2980 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs) 2981 { 2982 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2983 2984 return x86::psraw(lhs, rhs); 2985 } 2986 2987 RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs) 2988 { 2989 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2990 2991 return x86::psllw(lhs, rhs); 2992 } 2993 2994 RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs) 2995 { 2996 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2997 2998 return x86::psraw(lhs, rhs); 2999 } 3000 3001 RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs) 3002 { 3003 return lhs = lhs + rhs; 3004 } 3005 3006 RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs) 3007 { 3008 return lhs = lhs - rhs; 3009 } 3010 3011 RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs) 3012 { 3013 return lhs = lhs * rhs; 3014 } 3015 3016 // RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs) 3017 // { 3018 // return lhs = lhs / rhs; 3019 // } 3020 3021 // RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs) 3022 // { 3023 // return lhs = lhs % rhs; 3024 // } 3025 3026 RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs) 3027 { 3028 return lhs = lhs & rhs; 3029 } 3030 3031 RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs) 3032 { 3033 return lhs = lhs | rhs; 3034 } 3035 3036 RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs) 3037 { 3038 return lhs = lhs ^ rhs; 3039 } 3040 3041 RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs) 3042 { 3043 return lhs = lhs << rhs; 3044 } 3045 3046 RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs) 3047 { 3048 return lhs = lhs >> rhs; 3049 } 3050 3051 RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs) 3052 { 3053 return lhs = lhs << rhs; 3054 } 3055 3056 RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs) 3057 { 3058 return lhs = lhs >> rhs; 3059 } 3060 3061 // RValue<Short4> operator+(RValue<Short4> val) 3062 // { 3063 // return val; 3064 // } 3065 3066 RValue<Short4> operator-(RValue<Short4> val) 3067 { 3068 if(CPUID::supportsMMX2()) 3069 { 3070 return Short4(0, 0, 0, 0) - val; 3071 } 3072 else 3073 { 3074 return RValue<Short4>(Nucleus::createNeg(val.value)); 3075 } 3076 } 3077 3078 RValue<Short4> operator~(RValue<Short4> val) 3079 { 3080 if(CPUID::supportsMMX2()) 3081 { 3082 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu); 3083 } 3084 else 3085 { 3086 return RValue<Short4>(Nucleus::createNot(val.value)); 3087 } 3088 } 3089 3090 RValue<Short4> RoundShort4(RValue<Float4> cast) 3091 { 3092 RValue<Int4> v4i32 = x86::cvtps2dq(cast); 3093 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32); 3094 3095 return As<Short4>(Int2(As<Int4>(v8i16))); 3096 } 3097 3098 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) 3099 { 3100 return x86::pmaxsw(x, y); 3101 } 3102 3103 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y) 3104 { 3105 return x86::pminsw(x, y); 3106 } 3107 3108 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y) 3109 { 3110 return x86::paddsw(x, y); 3111 } 3112 3113 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y) 3114 { 3115 return x86::psubsw(x, y); 3116 } 3117 3118 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y) 3119 { 3120 return x86::pmulhw(x, y); 3121 } 3122 3123 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y) 3124 { 3125 return x86::pmaddwd(x, y); 3126 } 3127 3128 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y) 3129 { 3130 return x86::packsswb(x, y); 3131 } 3132 3133 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) 3134 { 3135 if(CPUID::supportsMMX2()) 3136 { 3137 return x86::punpcklwd(x, y); 3138 } 3139 else 3140 { 3141 Constant *shuffle[4]; 3142 shuffle[0] = Nucleus::createConstantInt(0); 3143 shuffle[1] = Nucleus::createConstantInt(4); 3144 shuffle[2] = Nucleus::createConstantInt(1); 3145 shuffle[3] = Nucleus::createConstantInt(5); 3146 3147 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)); 3148 3149 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType())); 3150 } 3151 } 3152 3153 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) 3154 { 3155 if(CPUID::supportsMMX2()) 3156 { 3157 return x86::punpckhwd(x, y); 3158 } 3159 else 3160 { 3161 Constant *shuffle[4]; 3162 shuffle[0] = Nucleus::createConstantInt(2); 3163 shuffle[1] = Nucleus::createConstantInt(6); 3164 shuffle[2] = Nucleus::createConstantInt(3); 3165 shuffle[3] = Nucleus::createConstantInt(7); 3166 3167 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)); 3168 3169 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType())); 3170 } 3171 } 3172 3173 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) 3174 { 3175 if(CPUID::supportsMMX2()) 3176 { 3177 return x86::pshufw(x, select); 3178 } 3179 else 3180 { 3181 return RValue<Short4>(Nucleus::createSwizzle(x.value, select)); 3182 } 3183 } 3184 3185 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) 3186 { 3187 if(CPUID::supportsMMX2()) 3188 { 3189 return x86::pinsrw(val, Int(element), i); 3190 } 3191 else 3192 { 3193 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i)); 3194 } 3195 } 3196 3197 RValue<Short> Extract(RValue<Short4> val, int i) 3198 { 3199 if(CPUID::supportsMMX2()) 3200 { 3201 return Short(x86::pextrw(val, i)); 3202 } 3203 else 3204 { 3205 return RValue<Short>(Nucleus::createExtractElement(val.value, i)); 3206 } 3207 } 3208 3209 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) 3210 { 3211 return x86::pcmpgtw(x, y); 3212 } 3213 3214 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y) 3215 { 3216 return x86::pcmpeqw(x, y); 3217 } 3218 3219 Type *Short4::getType() 3220 { 3221 if(CPUID::supportsMMX2()) 3222 { 3223 return MMX::getType(); 3224 } 3225 else 3226 { 3227 return VectorType::get(Short::getType(), 4); 3228 } 3229 } 3230 3231 UShort4::UShort4(RValue<Int4> cast) 3232 { 3233 *this = Short4(cast); 3234 } 3235 3236 UShort4::UShort4(RValue<Float4> cast, bool saturate) 3237 { 3238 Float4 sat; 3239 3240 if(saturate) 3241 { 3242 if(CPUID::supportsSSE4_1()) 3243 { 3244 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation 3245 } 3246 else 3247 { 3248 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)); 3249 } 3250 } 3251 else 3252 { 3253 sat = cast; 3254 } 3255 3256 Int4 int4(sat); 3257 3258 if(!saturate || !CPUID::supportsSSE4_1()) 3259 { 3260 *this = Short4(Int4(int4)); 3261 } 3262 else 3263 { 3264 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4))))); 3265 } 3266 } 3267 3268 UShort4::UShort4() 3269 { 3270 // xyzw.parent = this; 3271 } 3272 3273 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 3274 { 3275 // xyzw.parent = this; 3276 3277 Constant *constantVector[4]; 3278 constantVector[0] = Nucleus::createConstantShort(x); 3279 constantVector[1] = Nucleus::createConstantShort(y); 3280 constantVector[2] = Nucleus::createConstantShort(z); 3281 constantVector[3] = Nucleus::createConstantShort(w); 3282 Value *vector = Nucleus::createConstantVector(constantVector, 4); 3283 3284 storeValue(Nucleus::createBitCast(vector, getType())); 3285 } 3286 3287 UShort4::UShort4(RValue<UShort4> rhs) 3288 { 3289 // xyzw.parent = this; 3290 3291 storeValue(rhs.value); 3292 } 3293 3294 UShort4::UShort4(const UShort4 &rhs) 3295 { 3296 // xyzw.parent = this; 3297 3298 Value *value = rhs.loadValue(); 3299 storeValue(value); 3300 } 3301 3302 UShort4::UShort4(const Reference<UShort4> &rhs) 3303 { 3304 // xyzw.parent = this; 3305 3306 Value *value = rhs.loadValue(); 3307 storeValue(value); 3308 } 3309 3310 UShort4::UShort4(RValue<Short4> rhs) 3311 { 3312 // xyzw.parent = this; 3313 3314 storeValue(rhs.value); 3315 } 3316 3317 UShort4::UShort4(const Short4 &rhs) 3318 { 3319 // xyzw.parent = this; 3320 3321 Value *value = rhs.loadValue(); 3322 storeValue(value); 3323 } 3324 3325 UShort4::UShort4(const Reference<Short4> &rhs) 3326 { 3327 // xyzw.parent = this; 3328 3329 Value *value = rhs.loadValue(); 3330 storeValue(value); 3331 } 3332 3333 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const 3334 { 3335 storeValue(rhs.value); 3336 3337 return rhs; 3338 } 3339 3340 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const 3341 { 3342 Value *value = rhs.loadValue(); 3343 storeValue(value); 3344 3345 return RValue<UShort4>(value); 3346 } 3347 3348 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const 3349 { 3350 Value *value = rhs.loadValue(); 3351 storeValue(value); 3352 3353 return RValue<UShort4>(value); 3354 } 3355 3356 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const 3357 { 3358 storeValue(rhs.value); 3359 3360 return RValue<UShort4>(rhs); 3361 } 3362 3363 RValue<UShort4> UShort4::operator=(const Short4 &rhs) const 3364 { 3365 Value *value = rhs.loadValue(); 3366 storeValue(value); 3367 3368 return RValue<UShort4>(value); 3369 } 3370 3371 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const 3372 { 3373 Value *value = rhs.loadValue(); 3374 storeValue(value); 3375 3376 return RValue<UShort4>(value); 3377 } 3378 3379 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) 3380 { 3381 if(CPUID::supportsMMX2()) 3382 { 3383 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs))); 3384 } 3385 else 3386 { 3387 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value)); 3388 } 3389 } 3390 3391 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) 3392 { 3393 if(CPUID::supportsMMX2()) 3394 { 3395 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs))); 3396 } 3397 else 3398 { 3399 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value)); 3400 } 3401 } 3402 3403 3404 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) 3405 { 3406 if(CPUID::supportsMMX2()) 3407 { 3408 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs))); 3409 } 3410 else 3411 { 3412 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value)); 3413 } 3414 } 3415 3416 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) 3417 { 3418 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3419 3420 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3421 } 3422 3423 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs) 3424 { 3425 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3426 3427 return x86::psrlw(lhs, rhs); 3428 } 3429 3430 RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs) 3431 { 3432 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3433 3434 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3435 } 3436 3437 RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs) 3438 { 3439 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3440 3441 return x86::psrlw(lhs, rhs); 3442 } 3443 3444 RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs) 3445 { 3446 return lhs = lhs << rhs; 3447 } 3448 3449 RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs) 3450 { 3451 return lhs = lhs >> rhs; 3452 } 3453 3454 RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs) 3455 { 3456 return lhs = lhs << rhs; 3457 } 3458 3459 RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs) 3460 { 3461 return lhs = lhs >> rhs; 3462 } 3463 3464 RValue<UShort4> operator~(RValue<UShort4> val) 3465 { 3466 if(CPUID::supportsMMX2()) 3467 { 3468 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu)); 3469 } 3470 else 3471 { 3472 return RValue<UShort4>(Nucleus::createNot(val.value)); 3473 } 3474 } 3475 3476 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) 3477 { 3478 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3479 } 3480 3481 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y) 3482 { 3483 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3484 } 3485 3486 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y) 3487 { 3488 return x86::paddusw(x, y); 3489 } 3490 3491 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y) 3492 { 3493 return x86::psubusw(x, y); 3494 } 3495 3496 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y) 3497 { 3498 return x86::pmulhuw(x, y); 3499 } 3500 3501 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) 3502 { 3503 return x86::pavgw(x, y); 3504 } 3505 3506 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y) 3507 { 3508 return x86::packuswb(x, y); 3509 } 3510 3511 Type *UShort4::getType() 3512 { 3513 if(CPUID::supportsMMX2()) 3514 { 3515 return MMX::getType(); 3516 } 3517 else 3518 { 3519 return VectorType::get(UShort::getType(), 4); 3520 } 3521 } 3522 3523 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) 3524 { 3525 // xyzw.parent = this; 3526 3527 Constant *constantVector[8]; 3528 constantVector[0] = Nucleus::createConstantShort(c0); 3529 constantVector[1] = Nucleus::createConstantShort(c1); 3530 constantVector[2] = Nucleus::createConstantShort(c2); 3531 constantVector[3] = Nucleus::createConstantShort(c3); 3532 constantVector[4] = Nucleus::createConstantShort(c4); 3533 constantVector[5] = Nucleus::createConstantShort(c5); 3534 constantVector[6] = Nucleus::createConstantShort(c6); 3535 constantVector[7] = Nucleus::createConstantShort(c7); 3536 3537 storeValue(Nucleus::createConstantVector(constantVector, 8)); 3538 } 3539 3540 Short8::Short8(RValue<Short8> rhs) 3541 { 3542 // xyzw.parent = this; 3543 3544 storeValue(rhs.value); 3545 } 3546 3547 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) 3548 { 3549 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 3550 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 3551 3552 Value *long2 = UndefValue::get(Long2::getType()); 3553 long2 = Nucleus::createInsertElement(long2, loLong, 0); 3554 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 3555 Value *short8 = Nucleus::createBitCast(long2, Short8::getType()); 3556 3557 storeValue(short8); 3558 } 3559 3560 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) 3561 { 3562 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value)); 3563 } 3564 3565 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs) 3566 { 3567 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value)); 3568 } 3569 3570 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs) 3571 { 3572 return x86::psllw(lhs, rhs); // FIXME: Fallback required 3573 } 3574 3575 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs) 3576 { 3577 return x86::psraw(lhs, rhs); // FIXME: Fallback required 3578 } 3579 3580 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y) 3581 { 3582 return x86::pmaddwd(x, y); // FIXME: Fallback required 3583 } 3584 3585 RValue<Int4> Abs(RValue<Int4> x) 3586 { 3587 if(CPUID::supportsSSSE3()) 3588 { 3589 return x86::pabsd(x); 3590 } 3591 else 3592 { 3593 Int4 mask = (x >> 31); 3594 return (mask ^ x) - mask; 3595 } 3596 } 3597 3598 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) 3599 { 3600 return x86::pmulhw(x, y); // FIXME: Fallback required 3601 } 3602 3603 Type *Short8::getType() 3604 { 3605 return VectorType::get(Short::getType(), 8); 3606 } 3607 3608 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) 3609 { 3610 // xyzw.parent = this; 3611 3612 Constant *constantVector[8]; 3613 constantVector[0] = Nucleus::createConstantShort(c0); 3614 constantVector[1] = Nucleus::createConstantShort(c1); 3615 constantVector[2] = Nucleus::createConstantShort(c2); 3616 constantVector[3] = Nucleus::createConstantShort(c3); 3617 constantVector[4] = Nucleus::createConstantShort(c4); 3618 constantVector[5] = Nucleus::createConstantShort(c5); 3619 constantVector[6] = Nucleus::createConstantShort(c6); 3620 constantVector[7] = Nucleus::createConstantShort(c7); 3621 3622 storeValue(Nucleus::createConstantVector(constantVector, 8)); 3623 } 3624 3625 UShort8::UShort8(RValue<UShort8> rhs) 3626 { 3627 // xyzw.parent = this; 3628 3629 storeValue(rhs.value); 3630 } 3631 3632 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) 3633 { 3634 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 3635 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 3636 3637 Value *long2 = UndefValue::get(Long2::getType()); 3638 long2 = Nucleus::createInsertElement(long2, loLong, 0); 3639 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 3640 Value *short8 = Nucleus::createBitCast(long2, Short8::getType()); 3641 3642 storeValue(short8); 3643 } 3644 3645 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const 3646 { 3647 storeValue(rhs.value); 3648 3649 return rhs; 3650 } 3651 3652 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const 3653 { 3654 Value *value = rhs.loadValue(); 3655 storeValue(value); 3656 3657 return RValue<UShort8>(value); 3658 } 3659 3660 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const 3661 { 3662 Value *value = rhs.loadValue(); 3663 storeValue(value); 3664 3665 return RValue<UShort8>(value); 3666 } 3667 3668 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs) 3669 { 3670 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value)); 3671 } 3672 3673 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs) 3674 { 3675 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required 3676 } 3677 3678 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) 3679 { 3680 return x86::psrlw(lhs, rhs); // FIXME: Fallback required 3681 } 3682 3683 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs) 3684 { 3685 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value)); 3686 } 3687 3688 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs) 3689 { 3690 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value)); 3691 } 3692 3693 RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs) 3694 { 3695 return lhs = lhs + rhs; 3696 } 3697 3698 RValue<UShort8> operator~(RValue<UShort8> val) 3699 { 3700 return RValue<UShort8>(Nucleus::createNot(val.value)); 3701 } 3702 3703 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7) 3704 { 3705 Constant *pshufb[16]; 3706 pshufb[0] = Nucleus::createConstantInt(select0 + 0); 3707 pshufb[1] = Nucleus::createConstantInt(select0 + 1); 3708 pshufb[2] = Nucleus::createConstantInt(select1 + 0); 3709 pshufb[3] = Nucleus::createConstantInt(select1 + 1); 3710 pshufb[4] = Nucleus::createConstantInt(select2 + 0); 3711 pshufb[5] = Nucleus::createConstantInt(select2 + 1); 3712 pshufb[6] = Nucleus::createConstantInt(select3 + 0); 3713 pshufb[7] = Nucleus::createConstantInt(select3 + 1); 3714 pshufb[8] = Nucleus::createConstantInt(select4 + 0); 3715 pshufb[9] = Nucleus::createConstantInt(select4 + 1); 3716 pshufb[10] = Nucleus::createConstantInt(select5 + 0); 3717 pshufb[11] = Nucleus::createConstantInt(select5 + 1); 3718 pshufb[12] = Nucleus::createConstantInt(select6 + 0); 3719 pshufb[13] = Nucleus::createConstantInt(select6 + 1); 3720 pshufb[14] = Nucleus::createConstantInt(select7 + 0); 3721 pshufb[15] = Nucleus::createConstantInt(select7 + 1); 3722 3723 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType()); 3724 Value *shuffle = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16)); 3725 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 3726 3727 return RValue<UShort8>(short8); 3728 } 3729 3730 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) 3731 { 3732 return x86::pmulhuw(x, y); // FIXME: Fallback required 3733 } 3734 3735 // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element)) 3736 // RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element) 3737 // { 3738 // Constant *pshufb[16]; 3739 // pshufb[0] = Nucleus::createConstantInt(element + 0); 3740 // pshufb[1] = Nucleus::createConstantInt(element + 0); 3741 // pshufb[2] = Nucleus::createConstantInt(element + 4); 3742 // pshufb[3] = Nucleus::createConstantInt(element + 4); 3743 // pshufb[4] = Nucleus::createConstantInt(element + 8); 3744 // pshufb[5] = Nucleus::createConstantInt(element + 8); 3745 // pshufb[6] = Nucleus::createConstantInt(element + 12); 3746 // pshufb[7] = Nucleus::createConstantInt(element + 12); 3747 // pshufb[8] = Nucleus::createConstantInt(element + 16); 3748 // pshufb[9] = Nucleus::createConstantInt(element + 16); 3749 // pshufb[10] = Nucleus::createConstantInt(element + 20); 3750 // pshufb[11] = Nucleus::createConstantInt(element + 20); 3751 // pshufb[12] = Nucleus::createConstantInt(element + 24); 3752 // pshufb[13] = Nucleus::createConstantInt(element + 24); 3753 // pshufb[14] = Nucleus::createConstantInt(element + 28); 3754 // pshufb[15] = Nucleus::createConstantInt(element + 28); 3755 // 3756 // Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16)); 3757 // Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 3758 // 3759 // return RValue<UShort8>(short8); 3760 // } 3761 3762 Type *UShort8::getType() 3763 { 3764 return VectorType::get(UShort::getType(), 8); 3765 } 3766 3767 Int::Int(Argument<Int> argument) 3768 { 3769 storeValue(argument.value); 3770 } 3771 3772 Int::Int(RValue<Byte> cast) 3773 { 3774 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3775 3776 storeValue(integer); 3777 } 3778 3779 Int::Int(RValue<SByte> cast) 3780 { 3781 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3782 3783 storeValue(integer); 3784 } 3785 3786 Int::Int(RValue<Short> cast) 3787 { 3788 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3789 3790 storeValue(integer); 3791 } 3792 3793 Int::Int(RValue<UShort> cast) 3794 { 3795 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3796 3797 storeValue(integer); 3798 } 3799 3800 Int::Int(RValue<Int2> cast) 3801 { 3802 *this = Extract(cast, 0); 3803 } 3804 3805 Int::Int(RValue<Long> cast) 3806 { 3807 Value *integer = Nucleus::createTrunc(cast.value, Int::getType()); 3808 3809 storeValue(integer); 3810 } 3811 3812 Int::Int(RValue<Float> cast) 3813 { 3814 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType()); 3815 3816 storeValue(integer); 3817 } 3818 3819 Int::Int() 3820 { 3821 } 3822 3823 Int::Int(int x) 3824 { 3825 storeValue(Nucleus::createConstantInt(x)); 3826 } 3827 3828 Int::Int(RValue<Int> rhs) 3829 { 3830 storeValue(rhs.value); 3831 } 3832 3833 Int::Int(RValue<UInt> rhs) 3834 { 3835 storeValue(rhs.value); 3836 } 3837 3838 Int::Int(const Int &rhs) 3839 { 3840 Value *value = rhs.loadValue(); 3841 storeValue(value); 3842 } 3843 3844 Int::Int(const Reference<Int> &rhs) 3845 { 3846 Value *value = rhs.loadValue(); 3847 storeValue(value); 3848 } 3849 3850 Int::Int(const UInt &rhs) 3851 { 3852 Value *value = rhs.loadValue(); 3853 storeValue(value); 3854 } 3855 3856 Int::Int(const Reference<UInt> &rhs) 3857 { 3858 Value *value = rhs.loadValue(); 3859 storeValue(value); 3860 } 3861 3862 RValue<Int> Int::operator=(int rhs) const 3863 { 3864 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs))); 3865 } 3866 3867 RValue<Int> Int::operator=(RValue<Int> rhs) const 3868 { 3869 storeValue(rhs.value); 3870 3871 return rhs; 3872 } 3873 3874 RValue<Int> Int::operator=(RValue<UInt> rhs) const 3875 { 3876 storeValue(rhs.value); 3877 3878 return RValue<Int>(rhs); 3879 } 3880 3881 RValue<Int> Int::operator=(const Int &rhs) const 3882 { 3883 Value *value = rhs.loadValue(); 3884 storeValue(value); 3885 3886 return RValue<Int>(value); 3887 } 3888 3889 RValue<Int> Int::operator=(const Reference<Int> &rhs) const 3890 { 3891 Value *value = rhs.loadValue(); 3892 storeValue(value); 3893 3894 return RValue<Int>(value); 3895 } 3896 3897 RValue<Int> Int::operator=(const UInt &rhs) const 3898 { 3899 Value *value = rhs.loadValue(); 3900 storeValue(value); 3901 3902 return RValue<Int>(value); 3903 } 3904 3905 RValue<Int> Int::operator=(const Reference<UInt> &rhs) const 3906 { 3907 Value *value = rhs.loadValue(); 3908 storeValue(value); 3909 3910 return RValue<Int>(value); 3911 } 3912 3913 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs) 3914 { 3915 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value)); 3916 } 3917 3918 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs) 3919 { 3920 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value)); 3921 } 3922 3923 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs) 3924 { 3925 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value)); 3926 } 3927 3928 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs) 3929 { 3930 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value)); 3931 } 3932 3933 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs) 3934 { 3935 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value)); 3936 } 3937 3938 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs) 3939 { 3940 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value)); 3941 } 3942 3943 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs) 3944 { 3945 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value)); 3946 } 3947 3948 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs) 3949 { 3950 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value)); 3951 } 3952 3953 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs) 3954 { 3955 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value)); 3956 } 3957 3958 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs) 3959 { 3960 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value)); 3961 } 3962 3963 RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs) 3964 { 3965 return lhs = lhs + rhs; 3966 } 3967 3968 RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs) 3969 { 3970 return lhs = lhs - rhs; 3971 } 3972 3973 RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs) 3974 { 3975 return lhs = lhs * rhs; 3976 } 3977 3978 RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs) 3979 { 3980 return lhs = lhs / rhs; 3981 } 3982 3983 RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs) 3984 { 3985 return lhs = lhs % rhs; 3986 } 3987 3988 RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs) 3989 { 3990 return lhs = lhs & rhs; 3991 } 3992 3993 RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs) 3994 { 3995 return lhs = lhs | rhs; 3996 } 3997 3998 RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs) 3999 { 4000 return lhs = lhs ^ rhs; 4001 } 4002 4003 RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs) 4004 { 4005 return lhs = lhs << rhs; 4006 } 4007 4008 RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs) 4009 { 4010 return lhs = lhs >> rhs; 4011 } 4012 4013 RValue<Int> operator+(RValue<Int> val) 4014 { 4015 return val; 4016 } 4017 4018 RValue<Int> operator-(RValue<Int> val) 4019 { 4020 return RValue<Int>(Nucleus::createNeg(val.value)); 4021 } 4022 4023 RValue<Int> operator~(RValue<Int> val) 4024 { 4025 return RValue<Int>(Nucleus::createNot(val.value)); 4026 } 4027 4028 RValue<Int> operator++(const Int &val, int) // Post-increment 4029 { 4030 RValue<Int> res = val; 4031 4032 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1)); 4033 val.storeValue(inc); 4034 4035 return res; 4036 } 4037 4038 const Int &operator++(const Int &val) // Pre-increment 4039 { 4040 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1)); 4041 val.storeValue(inc); 4042 4043 return val; 4044 } 4045 4046 RValue<Int> operator--(const Int &val, int) // Post-decrement 4047 { 4048 RValue<Int> res = val; 4049 4050 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1)); 4051 val.storeValue(inc); 4052 4053 return res; 4054 } 4055 4056 const Int &operator--(const Int &val) // Pre-decrement 4057 { 4058 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1)); 4059 val.storeValue(inc); 4060 4061 return val; 4062 } 4063 4064 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs) 4065 { 4066 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 4067 } 4068 4069 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs) 4070 { 4071 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 4072 } 4073 4074 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs) 4075 { 4076 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 4077 } 4078 4079 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs) 4080 { 4081 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 4082 } 4083 4084 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs) 4085 { 4086 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4087 } 4088 4089 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs) 4090 { 4091 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4092 } 4093 4094 RValue<Int> Max(RValue<Int> x, RValue<Int> y) 4095 { 4096 return IfThenElse(x > y, x, y); 4097 } 4098 4099 RValue<Int> Min(RValue<Int> x, RValue<Int> y) 4100 { 4101 return IfThenElse(x < y, x, y); 4102 } 4103 4104 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max) 4105 { 4106 return Min(Max(x, min), max); 4107 } 4108 4109 RValue<Int> RoundInt(RValue<Float> cast) 4110 { 4111 return x86::cvtss2si(cast); 4112 4113 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4114 } 4115 4116 Type *Int::getType() 4117 { 4118 return Type::getInt32Ty(*Nucleus::getContext()); 4119 } 4120 4121 Long::Long(RValue<Int> cast) 4122 { 4123 4124 4125 Value *integer = Nucleus::createSExt(cast.value, Long::getType()); 4126 4127 storeValue(integer); 4128 } 4129 4130 Long::Long(RValue<UInt> cast) 4131 { 4132 Value *integer = Nucleus::createZExt(cast.value, Long::getType()); 4133 4134 storeValue(integer); 4135 } 4136 4137 Long::Long() 4138 { 4139 } 4140 4141 Long::Long(RValue<Long> rhs) 4142 { 4143 storeValue(rhs.value); 4144 } 4145 4146 RValue<Long> Long::operator=(int64_t rhs) const 4147 { 4148 return RValue<Long>(storeValue(Nucleus::createConstantInt(rhs))); 4149 } 4150 4151 RValue<Long> Long::operator=(RValue<Long> rhs) const 4152 { 4153 storeValue(rhs.value); 4154 4155 return rhs; 4156 } 4157 4158 RValue<Long> Long::operator=(const Long &rhs) const 4159 { 4160 Value *value = rhs.loadValue(); 4161 storeValue(value); 4162 4163 return RValue<Long>(value); 4164 } 4165 4166 RValue<Long> Long::operator=(const Reference<Long> &rhs) const 4167 { 4168 Value *value = rhs.loadValue(); 4169 storeValue(value); 4170 4171 return RValue<Long>(value); 4172 } 4173 4174 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs) 4175 { 4176 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value)); 4177 } 4178 4179 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs) 4180 { 4181 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); 4182 } 4183 4184 RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs) 4185 { 4186 return lhs = lhs + rhs; 4187 } 4188 4189 RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs) 4190 { 4191 return lhs = lhs - rhs; 4192 } 4193 4194 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y) 4195 { 4196 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value)); 4197 } 4198 4199 Type *Long::getType() 4200 { 4201 return Type::getInt64Ty(*Nucleus::getContext()); 4202 } 4203 4204 Long1::Long1(const RValue<UInt> cast) 4205 { 4206 Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0); 4207 Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1); 4208 4209 storeValue(Nucleus::createBitCast(zeroCast, Long1::getType())); 4210 } 4211 4212 Long1::Long1(RValue<Long1> rhs) 4213 { 4214 storeValue(rhs.value); 4215 } 4216 4217 Type *Long1::getType() 4218 { 4219 if(CPUID::supportsMMX2()) 4220 { 4221 return MMX::getType(); 4222 } 4223 else 4224 { 4225 return VectorType::get(Long::getType(), 1); 4226 } 4227 } 4228 4229 RValue<Long2> UnpackHigh(RValue<Long2> x, RValue<Long2> y) 4230 { 4231 Constant *shuffle[2]; 4232 shuffle[0] = Nucleus::createConstantInt(1); 4233 shuffle[1] = Nucleus::createConstantInt(3); 4234 4235 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2)); 4236 4237 return RValue<Long2>(packed); 4238 } 4239 4240 Type *Long2::getType() 4241 { 4242 return VectorType::get(Long::getType(), 2); 4243 } 4244 4245 UInt::UInt(Argument<UInt> argument) 4246 { 4247 storeValue(argument.value); 4248 } 4249 4250 UInt::UInt(RValue<UShort> cast) 4251 { 4252 Value *integer = Nucleus::createZExt(cast.value, UInt::getType()); 4253 4254 storeValue(integer); 4255 } 4256 4257 UInt::UInt(RValue<Long> cast) 4258 { 4259 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType()); 4260 4261 storeValue(integer); 4262 } 4263 4264 UInt::UInt(RValue<Float> cast) 4265 { 4266 Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType()); 4267 4268 storeValue(integer); 4269 } 4270 4271 UInt::UInt() 4272 { 4273 } 4274 4275 UInt::UInt(int x) 4276 { 4277 storeValue(Nucleus::createConstantInt(x)); 4278 } 4279 4280 UInt::UInt(unsigned int x) 4281 { 4282 storeValue(Nucleus::createConstantInt(x)); 4283 } 4284 4285 UInt::UInt(RValue<UInt> rhs) 4286 { 4287 storeValue(rhs.value); 4288 } 4289 4290 UInt::UInt(RValue<Int> rhs) 4291 { 4292 storeValue(rhs.value); 4293 } 4294 4295 UInt::UInt(const UInt &rhs) 4296 { 4297 Value *value = rhs.loadValue(); 4298 storeValue(value); 4299 } 4300 4301 UInt::UInt(const Reference<UInt> &rhs) 4302 { 4303 Value *value = rhs.loadValue(); 4304 storeValue(value); 4305 } 4306 4307 UInt::UInt(const Int &rhs) 4308 { 4309 Value *value = rhs.loadValue(); 4310 storeValue(value); 4311 } 4312 4313 UInt::UInt(const Reference<Int> &rhs) 4314 { 4315 Value *value = rhs.loadValue(); 4316 storeValue(value); 4317 } 4318 4319 RValue<UInt> UInt::operator=(unsigned int rhs) const 4320 { 4321 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs))); 4322 } 4323 4324 RValue<UInt> UInt::operator=(RValue<UInt> rhs) const 4325 { 4326 storeValue(rhs.value); 4327 4328 return rhs; 4329 } 4330 4331 RValue<UInt> UInt::operator=(RValue<Int> rhs) const 4332 { 4333 storeValue(rhs.value); 4334 4335 return RValue<UInt>(rhs); 4336 } 4337 4338 RValue<UInt> UInt::operator=(const UInt &rhs) const 4339 { 4340 Value *value = rhs.loadValue(); 4341 storeValue(value); 4342 4343 return RValue<UInt>(value); 4344 } 4345 4346 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const 4347 { 4348 Value *value = rhs.loadValue(); 4349 storeValue(value); 4350 4351 return RValue<UInt>(value); 4352 } 4353 4354 RValue<UInt> UInt::operator=(const Int &rhs) const 4355 { 4356 Value *value = rhs.loadValue(); 4357 storeValue(value); 4358 4359 return RValue<UInt>(value); 4360 } 4361 4362 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const 4363 { 4364 Value *value = rhs.loadValue(); 4365 storeValue(value); 4366 4367 return RValue<UInt>(value); 4368 } 4369 4370 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs) 4371 { 4372 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value)); 4373 } 4374 4375 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs) 4376 { 4377 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value)); 4378 } 4379 4380 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs) 4381 { 4382 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value)); 4383 } 4384 4385 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs) 4386 { 4387 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value)); 4388 } 4389 4390 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs) 4391 { 4392 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value)); 4393 } 4394 4395 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs) 4396 { 4397 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value)); 4398 } 4399 4400 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs) 4401 { 4402 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value)); 4403 } 4404 4405 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs) 4406 { 4407 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value)); 4408 } 4409 4410 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs) 4411 { 4412 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value)); 4413 } 4414 4415 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs) 4416 { 4417 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value)); 4418 } 4419 4420 RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs) 4421 { 4422 return lhs = lhs + rhs; 4423 } 4424 4425 RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs) 4426 { 4427 return lhs = lhs - rhs; 4428 } 4429 4430 RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs) 4431 { 4432 return lhs = lhs * rhs; 4433 } 4434 4435 RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs) 4436 { 4437 return lhs = lhs / rhs; 4438 } 4439 4440 RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs) 4441 { 4442 return lhs = lhs % rhs; 4443 } 4444 4445 RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs) 4446 { 4447 return lhs = lhs & rhs; 4448 } 4449 4450 RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs) 4451 { 4452 return lhs = lhs | rhs; 4453 } 4454 4455 RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs) 4456 { 4457 return lhs = lhs ^ rhs; 4458 } 4459 4460 RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs) 4461 { 4462 return lhs = lhs << rhs; 4463 } 4464 4465 RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs) 4466 { 4467 return lhs = lhs >> rhs; 4468 } 4469 4470 RValue<UInt> operator+(RValue<UInt> val) 4471 { 4472 return val; 4473 } 4474 4475 RValue<UInt> operator-(RValue<UInt> val) 4476 { 4477 return RValue<UInt>(Nucleus::createNeg(val.value)); 4478 } 4479 4480 RValue<UInt> operator~(RValue<UInt> val) 4481 { 4482 return RValue<UInt>(Nucleus::createNot(val.value)); 4483 } 4484 4485 RValue<UInt> operator++(const UInt &val, int) // Post-increment 4486 { 4487 RValue<UInt> res = val; 4488 4489 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1)); 4490 val.storeValue(inc); 4491 4492 return res; 4493 } 4494 4495 const UInt &operator++(const UInt &val) // Pre-increment 4496 { 4497 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1)); 4498 val.storeValue(inc); 4499 4500 return val; 4501 } 4502 4503 RValue<UInt> operator--(const UInt &val, int) // Post-decrement 4504 { 4505 RValue<UInt> res = val; 4506 4507 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1)); 4508 val.storeValue(inc); 4509 4510 return res; 4511 } 4512 4513 const UInt &operator--(const UInt &val) // Pre-decrement 4514 { 4515 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1)); 4516 val.storeValue(inc); 4517 4518 return val; 4519 } 4520 4521 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y) 4522 { 4523 return IfThenElse(x > y, x, y); 4524 } 4525 4526 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y) 4527 { 4528 return IfThenElse(x < y, x, y); 4529 } 4530 4531 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max) 4532 { 4533 return Min(Max(x, min), max); 4534 } 4535 4536 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs) 4537 { 4538 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 4539 } 4540 4541 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs) 4542 { 4543 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 4544 } 4545 4546 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs) 4547 { 4548 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 4549 } 4550 4551 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs) 4552 { 4553 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 4554 } 4555 4556 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs) 4557 { 4558 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4559 } 4560 4561 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs) 4562 { 4563 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4564 } 4565 4566 // RValue<UInt> RoundUInt(RValue<Float> cast) 4567 // { 4568 // return x86::cvtss2si(val); // FIXME: Unsigned 4569 // 4570 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4571 // } 4572 4573 Type *UInt::getType() 4574 { 4575 return Type::getInt32Ty(*Nucleus::getContext()); 4576 } 4577 4578 // Int2::Int2(RValue<Int> cast) 4579 // { 4580 // Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 4581 // Value *vector = Nucleus::createBitCast(extend, Int2::getType()); 4582 // 4583 // Constant *shuffle[2]; 4584 // shuffle[0] = Nucleus::createConstantInt(0); 4585 // shuffle[1] = Nucleus::createConstantInt(0); 4586 // 4587 // Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2)); 4588 // 4589 // storeValue(replicate); 4590 // } 4591 4592 Int2::Int2(RValue<Int4> cast) 4593 { 4594 Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType()); 4595 Value *element = Nucleus::createExtractElement(long2, 0); 4596 Value *int2 = Nucleus::createBitCast(element, Int2::getType()); 4597 4598 storeValue(int2); 4599 } 4600 4601 Int2::Int2() 4602 { 4603 // xy.parent = this; 4604 } 4605 4606 Int2::Int2(int x, int y) 4607 { 4608 // xy.parent = this; 4609 4610 Constant *constantVector[2]; 4611 constantVector[0] = Nucleus::createConstantInt(x); 4612 constantVector[1] = Nucleus::createConstantInt(y); 4613 Value *vector = Nucleus::createConstantVector(constantVector, 2); 4614 4615 storeValue(Nucleus::createBitCast(vector, getType())); 4616 } 4617 4618 Int2::Int2(RValue<Int2> rhs) 4619 { 4620 // xy.parent = this; 4621 4622 storeValue(rhs.value); 4623 } 4624 4625 Int2::Int2(const Int2 &rhs) 4626 { 4627 // xy.parent = this; 4628 4629 Value *value = rhs.loadValue(); 4630 storeValue(value); 4631 } 4632 4633 Int2::Int2(const Reference<Int2> &rhs) 4634 { 4635 // xy.parent = this; 4636 4637 Value *value = rhs.loadValue(); 4638 storeValue(value); 4639 } 4640 4641 Int2::Int2(RValue<Int> lo, RValue<Int> hi) 4642 { 4643 if(CPUID::supportsMMX2()) 4644 { 4645 // movd mm0, lo 4646 // movd mm1, hi 4647 // punpckldq mm0, mm1 4648 storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value); 4649 } 4650 else 4651 { 4652 Constant *shuffle[2]; 4653 shuffle[0] = Nucleus::createConstantInt(0); 4654 shuffle[1] = Nucleus::createConstantInt(1); 4655 4656 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2)); 4657 4658 storeValue(Nucleus::createBitCast(packed, Int2::getType())); 4659 } 4660 } 4661 4662 RValue<Int2> Int2::operator=(RValue<Int2> rhs) const 4663 { 4664 storeValue(rhs.value); 4665 4666 return rhs; 4667 } 4668 4669 RValue<Int2> Int2::operator=(const Int2 &rhs) const 4670 { 4671 Value *value = rhs.loadValue(); 4672 storeValue(value); 4673 4674 return RValue<Int2>(value); 4675 } 4676 4677 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const 4678 { 4679 Value *value = rhs.loadValue(); 4680 storeValue(value); 4681 4682 return RValue<Int2>(value); 4683 } 4684 4685 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) 4686 { 4687 if(CPUID::supportsMMX2()) 4688 { 4689 return x86::paddd(lhs, rhs); 4690 } 4691 else 4692 { 4693 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value)); 4694 } 4695 } 4696 4697 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) 4698 { 4699 if(CPUID::supportsMMX2()) 4700 { 4701 return x86::psubd(lhs, rhs); 4702 } 4703 else 4704 { 4705 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value)); 4706 } 4707 } 4708 4709 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) 4710 // { 4711 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value)); 4712 // } 4713 4714 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs) 4715 // { 4716 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value)); 4717 // } 4718 4719 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs) 4720 // { 4721 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value)); 4722 // } 4723 4724 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) 4725 { 4726 if(CPUID::supportsMMX2()) 4727 { 4728 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 4729 } 4730 else 4731 { 4732 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value)); 4733 } 4734 } 4735 4736 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) 4737 { 4738 if(CPUID::supportsMMX2()) 4739 { 4740 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 4741 } 4742 else 4743 { 4744 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value)); 4745 } 4746 } 4747 4748 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) 4749 { 4750 if(CPUID::supportsMMX2()) 4751 { 4752 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 4753 } 4754 else 4755 { 4756 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value)); 4757 } 4758 } 4759 4760 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) 4761 { 4762 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4763 4764 return x86::pslld(lhs, rhs); 4765 } 4766 4767 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs) 4768 { 4769 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4770 4771 return x86::psrad(lhs, rhs); 4772 } 4773 4774 RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs) 4775 { 4776 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4777 4778 return x86::pslld(lhs, rhs); 4779 } 4780 4781 RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs) 4782 { 4783 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4784 4785 return x86::psrad(lhs, rhs); 4786 } 4787 4788 RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs) 4789 { 4790 return lhs = lhs + rhs; 4791 } 4792 4793 RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs) 4794 { 4795 return lhs = lhs - rhs; 4796 } 4797 4798 // RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs) 4799 // { 4800 // return lhs = lhs * rhs; 4801 // } 4802 4803 // RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs) 4804 // { 4805 // return lhs = lhs / rhs; 4806 // } 4807 4808 // RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs) 4809 // { 4810 // return lhs = lhs % rhs; 4811 // } 4812 4813 RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs) 4814 { 4815 return lhs = lhs & rhs; 4816 } 4817 4818 RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs) 4819 { 4820 return lhs = lhs | rhs; 4821 } 4822 4823 RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs) 4824 { 4825 return lhs = lhs ^ rhs; 4826 } 4827 4828 RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs) 4829 { 4830 return lhs = lhs << rhs; 4831 } 4832 4833 RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs) 4834 { 4835 return lhs = lhs >> rhs; 4836 } 4837 4838 RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs) 4839 { 4840 return lhs = lhs << rhs; 4841 } 4842 4843 RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs) 4844 { 4845 return lhs = lhs >> rhs; 4846 } 4847 4848 // RValue<Int2> operator+(RValue<Int2> val) 4849 // { 4850 // return val; 4851 // } 4852 4853 // RValue<Int2> operator-(RValue<Int2> val) 4854 // { 4855 // return RValue<Int2>(Nucleus::createNeg(val.value)); 4856 // } 4857 4858 RValue<Int2> operator~(RValue<Int2> val) 4859 { 4860 if(CPUID::supportsMMX2()) 4861 { 4862 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF); 4863 } 4864 else 4865 { 4866 return RValue<Int2>(Nucleus::createNot(val.value)); 4867 } 4868 } 4869 4870 RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y) 4871 { 4872 if(CPUID::supportsMMX2()) 4873 { 4874 return x86::punpckldq(x, y); 4875 } 4876 else 4877 { 4878 Constant *shuffle[2]; 4879 shuffle[0] = Nucleus::createConstantInt(0); 4880 shuffle[1] = Nucleus::createConstantInt(2); 4881 4882 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2)); 4883 4884 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType())); 4885 } 4886 } 4887 4888 RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y) 4889 { 4890 if(CPUID::supportsMMX2()) 4891 { 4892 return x86::punpckhdq(x, y); 4893 } 4894 else 4895 { 4896 Constant *shuffle[2]; 4897 shuffle[0] = Nucleus::createConstantInt(1); 4898 shuffle[1] = Nucleus::createConstantInt(3); 4899 4900 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2)); 4901 4902 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType())); 4903 } 4904 } 4905 4906 RValue<Int> Extract(RValue<Int2> val, int i) 4907 { 4908 if(false) // FIXME: LLVM does not generate optimal code 4909 { 4910 return RValue<Int>(Nucleus::createExtractElement(val.value, i)); 4911 } 4912 else 4913 { 4914 if(i == 0) 4915 { 4916 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), 0)); 4917 } 4918 else 4919 { 4920 Int2 val2 = As<Int2>(UnpackHigh(val, val)); 4921 4922 return Extract(val2, 0); 4923 } 4924 } 4925 } 4926 4927 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) 4928 { 4929 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), element.value, i), Int2::getType())); 4930 } 4931 4932 Type *Int2::getType() 4933 { 4934 if(CPUID::supportsMMX2()) 4935 { 4936 return MMX::getType(); 4937 } 4938 else 4939 { 4940 return VectorType::get(Int::getType(), 2); 4941 } 4942 } 4943 4944 UInt2::UInt2() 4945 { 4946 // xy.parent = this; 4947 } 4948 4949 UInt2::UInt2(unsigned int x, unsigned int y) 4950 { 4951 // xy.parent = this; 4952 4953 Constant *constantVector[2]; 4954 constantVector[0] = Nucleus::createConstantInt(x); 4955 constantVector[1] = Nucleus::createConstantInt(y); 4956 Value *vector = Nucleus::createConstantVector(constantVector, 2); 4957 4958 storeValue(Nucleus::createBitCast(vector, getType())); 4959 } 4960 4961 UInt2::UInt2(RValue<UInt2> rhs) 4962 { 4963 // xy.parent = this; 4964 4965 storeValue(rhs.value); 4966 } 4967 4968 UInt2::UInt2(const UInt2 &rhs) 4969 { 4970 // xy.parent = this; 4971 4972 Value *value = rhs.loadValue(); 4973 storeValue(value); 4974 } 4975 4976 UInt2::UInt2(const Reference<UInt2> &rhs) 4977 { 4978 // xy.parent = this; 4979 4980 Value *value = rhs.loadValue(); 4981 storeValue(value); 4982 } 4983 4984 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const 4985 { 4986 storeValue(rhs.value); 4987 4988 return rhs; 4989 } 4990 4991 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const 4992 { 4993 Value *value = rhs.loadValue(); 4994 storeValue(value); 4995 4996 return RValue<UInt2>(value); 4997 } 4998 4999 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const 5000 { 5001 Value *value = rhs.loadValue(); 5002 storeValue(value); 5003 5004 return RValue<UInt2>(value); 5005 } 5006 5007 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) 5008 { 5009 if(CPUID::supportsMMX2()) 5010 { 5011 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs))); 5012 } 5013 else 5014 { 5015 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value)); 5016 } 5017 } 5018 5019 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) 5020 { 5021 if(CPUID::supportsMMX2()) 5022 { 5023 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs))); 5024 } 5025 else 5026 { 5027 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value)); 5028 } 5029 } 5030 5031 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) 5032 // { 5033 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value)); 5034 // } 5035 5036 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs) 5037 // { 5038 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value)); 5039 // } 5040 5041 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs) 5042 // { 5043 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value)); 5044 // } 5045 5046 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) 5047 { 5048 if(CPUID::supportsMMX2()) 5049 { 5050 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 5051 } 5052 else 5053 { 5054 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value)); 5055 } 5056 } 5057 5058 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) 5059 { 5060 if(CPUID::supportsMMX2()) 5061 { 5062 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 5063 } 5064 else 5065 { 5066 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value)); 5067 } 5068 } 5069 5070 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) 5071 { 5072 if(CPUID::supportsMMX2()) 5073 { 5074 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 5075 } 5076 else 5077 { 5078 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value)); 5079 } 5080 } 5081 5082 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) 5083 { 5084 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 5085 5086 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 5087 } 5088 5089 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs) 5090 { 5091 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 5092 5093 return x86::psrld(lhs, rhs); 5094 } 5095 5096 RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs) 5097 { 5098 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 5099 5100 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 5101 } 5102 5103 RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs) 5104 { 5105 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 5106 5107 return x86::psrld(lhs, rhs); 5108 } 5109 5110 RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs) 5111 { 5112 return lhs = lhs + rhs; 5113 } 5114 5115 RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs) 5116 { 5117 return lhs = lhs - rhs; 5118 } 5119 5120 // RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs) 5121 // { 5122 // return lhs = lhs * rhs; 5123 // } 5124 5125 // RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs) 5126 // { 5127 // return lhs = lhs / rhs; 5128 // } 5129 5130 // RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs) 5131 // { 5132 // return lhs = lhs % rhs; 5133 // } 5134 5135 RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs) 5136 { 5137 return lhs = lhs & rhs; 5138 } 5139 5140 RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs) 5141 { 5142 return lhs = lhs | rhs; 5143 } 5144 5145 RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs) 5146 { 5147 return lhs = lhs ^ rhs; 5148 } 5149 5150 RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs) 5151 { 5152 return lhs = lhs << rhs; 5153 } 5154 5155 RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs) 5156 { 5157 return lhs = lhs >> rhs; 5158 } 5159 5160 RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs) 5161 { 5162 return lhs = lhs << rhs; 5163 } 5164 5165 RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs) 5166 { 5167 return lhs = lhs >> rhs; 5168 } 5169 5170 // RValue<UInt2> operator+(RValue<UInt2> val) 5171 // { 5172 // return val; 5173 // } 5174 5175 // RValue<UInt2> operator-(RValue<UInt2> val) 5176 // { 5177 // return RValue<UInt2>(Nucleus::createNeg(val.value)); 5178 // } 5179 5180 RValue<UInt2> operator~(RValue<UInt2> val) 5181 { 5182 if(CPUID::supportsMMX2()) 5183 { 5184 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF); 5185 } 5186 else 5187 { 5188 return RValue<UInt2>(Nucleus::createNot(val.value)); 5189 } 5190 } 5191 5192 Type *UInt2::getType() 5193 { 5194 if(CPUID::supportsMMX2()) 5195 { 5196 return MMX::getType(); 5197 } 5198 else 5199 { 5200 return VectorType::get(UInt::getType(), 2); 5201 } 5202 } 5203 5204 Int4::Int4(RValue<Float4> cast) 5205 { 5206 // xyzw.parent = this; 5207 5208 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType()); 5209 5210 storeValue(xyzw); 5211 } 5212 5213 Int4::Int4(RValue<Short4> cast) 5214 { 5215 Value *long2 = UndefValue::get(Long2::getType()); 5216 Value *element = Nucleus::createBitCast(cast.value, Long::getType()); 5217 long2 = Nucleus::createInsertElement(long2, element, 0); 5218 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType())); 5219 5220 if(CPUID::supportsSSE4_1()) 5221 { 5222 storeValue(x86::pmovsxwd(vector).value); 5223 } 5224 else 5225 { 5226 Value *b = Nucleus::createBitCast(vector.value, Short8::getType()); 5227 5228 Constant *swizzle[8]; 5229 swizzle[0] = Nucleus::createConstantInt(0); 5230 swizzle[1] = Nucleus::createConstantInt(0); 5231 swizzle[2] = Nucleus::createConstantInt(1); 5232 swizzle[3] = Nucleus::createConstantInt(1); 5233 swizzle[4] = Nucleus::createConstantInt(2); 5234 swizzle[5] = Nucleus::createConstantInt(2); 5235 swizzle[6] = Nucleus::createConstantInt(3); 5236 swizzle[7] = Nucleus::createConstantInt(3); 5237 5238 Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 8)); 5239 Value *d = Nucleus::createBitCast(c, Int4::getType()); 5240 storeValue(d); 5241 5242 // Each Short is packed into each Int in the (Short | Short) format. 5243 // Shifting by 16 will retrieve the original Short value. 5244 // Shitfing an Int will propagate the sign bit, which will work 5245 // for both positive and negative values of a Short. 5246 *this >>= 16; 5247 } 5248 } 5249 5250 Int4::Int4(RValue<UShort4> cast) 5251 { 5252 Value *long2 = UndefValue::get(Long2::getType()); 5253 Value *element = Nucleus::createBitCast(cast.value, Long::getType()); 5254 long2 = Nucleus::createInsertElement(long2, element, 0); 5255 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType())); 5256 5257 if(CPUID::supportsSSE4_1()) 5258 { 5259 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value); 5260 } 5261 else 5262 { 5263 Value *b = Nucleus::createBitCast(vector.value, Short8::getType()); 5264 5265 Constant *swizzle[8]; 5266 swizzle[0] = Nucleus::createConstantInt(0); 5267 swizzle[1] = Nucleus::createConstantInt(8); 5268 swizzle[2] = Nucleus::createConstantInt(1); 5269 swizzle[3] = Nucleus::createConstantInt(9); 5270 swizzle[4] = Nucleus::createConstantInt(2); 5271 swizzle[5] = Nucleus::createConstantInt(10); 5272 swizzle[6] = Nucleus::createConstantInt(3); 5273 swizzle[7] = Nucleus::createConstantInt(11); 5274 5275 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle, 8)); 5276 Value *d = Nucleus::createBitCast(c, Int4::getType()); 5277 storeValue(d); 5278 } 5279 } 5280 5281 Int4::Int4() 5282 { 5283 // xyzw.parent = this; 5284 } 5285 5286 Int4::Int4(int xyzw) 5287 { 5288 constant(xyzw, xyzw, xyzw, xyzw); 5289 } 5290 5291 Int4::Int4(int x, int yzw) 5292 { 5293 constant(x, yzw, yzw, yzw); 5294 } 5295 5296 Int4::Int4(int x, int y, int zw) 5297 { 5298 constant(x, y, zw, zw); 5299 } 5300 5301 Int4::Int4(int x, int y, int z, int w) 5302 { 5303 constant(x, y, z, w); 5304 } 5305 5306 void Int4::constant(int x, int y, int z, int w) 5307 { 5308 // xyzw.parent = this; 5309 5310 Constant *constantVector[4]; 5311 constantVector[0] = Nucleus::createConstantInt(x); 5312 constantVector[1] = Nucleus::createConstantInt(y); 5313 constantVector[2] = Nucleus::createConstantInt(z); 5314 constantVector[3] = Nucleus::createConstantInt(w); 5315 5316 storeValue(Nucleus::createConstantVector(constantVector, 4)); 5317 } 5318 5319 Int4::Int4(RValue<Int4> rhs) 5320 { 5321 // xyzw.parent = this; 5322 5323 storeValue(rhs.value); 5324 } 5325 5326 Int4::Int4(const Int4 &rhs) 5327 { 5328 // xyzw.parent = this; 5329 5330 Value *value = rhs.loadValue(); 5331 storeValue(value); 5332 } 5333 5334 Int4::Int4(const Reference<Int4> &rhs) 5335 { 5336 // xyzw.parent = this; 5337 5338 Value *value = rhs.loadValue(); 5339 storeValue(value); 5340 } 5341 5342 Int4::Int4(RValue<UInt4> rhs) 5343 { 5344 // xyzw.parent = this; 5345 5346 storeValue(rhs.value); 5347 } 5348 5349 Int4::Int4(const UInt4 &rhs) 5350 { 5351 // xyzw.parent = this; 5352 5353 Value *value = rhs.loadValue(); 5354 storeValue(value); 5355 } 5356 5357 Int4::Int4(const Reference<UInt4> &rhs) 5358 { 5359 // xyzw.parent = this; 5360 5361 Value *value = rhs.loadValue(); 5362 storeValue(value); 5363 } 5364 5365 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) 5366 { 5367 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 5368 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 5369 5370 Value *long2 = UndefValue::get(Long2::getType()); 5371 long2 = Nucleus::createInsertElement(long2, loLong, 0); 5372 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 5373 Value *int4 = Nucleus::createBitCast(long2, Int4::getType()); 5374 5375 storeValue(int4); 5376 } 5377 5378 RValue<Int4> Int4::operator=(RValue<Int4> rhs) const 5379 { 5380 storeValue(rhs.value); 5381 5382 return rhs; 5383 } 5384 5385 RValue<Int4> Int4::operator=(const Int4 &rhs) const 5386 { 5387 Value *value = rhs.loadValue(); 5388 storeValue(value); 5389 5390 return RValue<Int4>(value); 5391 } 5392 5393 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const 5394 { 5395 Value *value = rhs.loadValue(); 5396 storeValue(value); 5397 5398 return RValue<Int4>(value); 5399 } 5400 5401 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs) 5402 { 5403 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value)); 5404 } 5405 5406 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs) 5407 { 5408 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value)); 5409 } 5410 5411 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs) 5412 { 5413 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value)); 5414 } 5415 5416 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs) 5417 { 5418 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value)); 5419 } 5420 5421 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs) 5422 { 5423 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value)); 5424 } 5425 5426 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs) 5427 { 5428 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value)); 5429 } 5430 5431 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs) 5432 { 5433 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value)); 5434 } 5435 5436 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs) 5437 { 5438 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value)); 5439 } 5440 5441 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs) 5442 { 5443 return x86::pslld(lhs, rhs); 5444 } 5445 5446 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs) 5447 { 5448 return x86::psrad(lhs, rhs); 5449 } 5450 5451 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs) 5452 { 5453 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value)); 5454 } 5455 5456 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs) 5457 { 5458 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value)); 5459 } 5460 5461 RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs) 5462 { 5463 return lhs = lhs + rhs; 5464 } 5465 5466 RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs) 5467 { 5468 return lhs = lhs - rhs; 5469 } 5470 5471 RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs) 5472 { 5473 return lhs = lhs * rhs; 5474 } 5475 5476 // RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs) 5477 // { 5478 // return lhs = lhs / rhs; 5479 // } 5480 5481 // RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs) 5482 // { 5483 // return lhs = lhs % rhs; 5484 // } 5485 5486 RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs) 5487 { 5488 return lhs = lhs & rhs; 5489 } 5490 5491 RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs) 5492 { 5493 return lhs = lhs | rhs; 5494 } 5495 5496 RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs) 5497 { 5498 return lhs = lhs ^ rhs; 5499 } 5500 5501 RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs) 5502 { 5503 return lhs = lhs << rhs; 5504 } 5505 5506 RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs) 5507 { 5508 return lhs = lhs >> rhs; 5509 } 5510 5511 RValue<Int4> operator+(RValue<Int4> val) 5512 { 5513 return val; 5514 } 5515 5516 RValue<Int4> operator-(RValue<Int4> val) 5517 { 5518 return RValue<Int4>(Nucleus::createNeg(val.value)); 5519 } 5520 5521 RValue<Int4> operator~(RValue<Int4> val) 5522 { 5523 return RValue<Int4>(Nucleus::createNot(val.value)); 5524 } 5525 5526 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y) 5527 { 5528 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5529 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5530 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5531 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5532 } 5533 5534 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y) 5535 { 5536 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())); 5537 } 5538 5539 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y) 5540 { 5541 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5542 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5543 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())); 5544 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5545 } 5546 5547 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y) 5548 { 5549 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5550 } 5551 5552 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y) 5553 { 5554 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5555 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5556 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())); 5557 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5558 } 5559 5560 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y) 5561 { 5562 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())); 5563 } 5564 5565 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y) 5566 { 5567 if(CPUID::supportsSSE4_1()) 5568 { 5569 return x86::pmaxsd(x, y); 5570 } 5571 else 5572 { 5573 RValue<Int4> greater = CmpNLE(x, y); 5574 return x & greater | y & ~greater; 5575 } 5576 } 5577 5578 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y) 5579 { 5580 if(CPUID::supportsSSE4_1()) 5581 { 5582 return x86::pminsd(x, y); 5583 } 5584 else 5585 { 5586 RValue<Int4> less = CmpLT(x, y); 5587 return x & less | y & ~less; 5588 } 5589 } 5590 5591 RValue<Int4> RoundInt(RValue<Float4> cast) 5592 { 5593 return x86::cvtps2dq(cast); 5594 } 5595 5596 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y) 5597 { 5598 return x86::packssdw(x, y); 5599 } 5600 5601 RValue<Int> Extract(RValue<Int4> x, int i) 5602 { 5603 return RValue<Int>(Nucleus::createExtractElement(x.value, i)); 5604 } 5605 5606 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i) 5607 { 5608 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i)); 5609 } 5610 5611 RValue<Int> SignMask(RValue<Int4> x) 5612 { 5613 return x86::movmskps(As<Float4>(x)); 5614 } 5615 5616 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) 5617 { 5618 return RValue<Int4>(Nucleus::createSwizzle(x.value, select)); 5619 } 5620 5621 Type *Int4::getType() 5622 { 5623 return VectorType::get(Int::getType(), 4); 5624 } 5625 5626 UInt4::UInt4(RValue<Float4> cast) 5627 { 5628 // xyzw.parent = this; 5629 5630 Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType()); 5631 5632 storeValue(xyzw); 5633 } 5634 5635 UInt4::UInt4() 5636 { 5637 // xyzw.parent = this; 5638 } 5639 5640 UInt4::UInt4(int xyzw) 5641 { 5642 constant(xyzw, xyzw, xyzw, xyzw); 5643 } 5644 5645 UInt4::UInt4(int x, int yzw) 5646 { 5647 constant(x, yzw, yzw, yzw); 5648 } 5649 5650 UInt4::UInt4(int x, int y, int zw) 5651 { 5652 constant(x, y, zw, zw); 5653 } 5654 5655 UInt4::UInt4(int x, int y, int z, int w) 5656 { 5657 constant(x, y, z, w); 5658 } 5659 5660 void UInt4::constant(int x, int y, int z, int w) 5661 { 5662 // xyzw.parent = this; 5663 5664 Constant *constantVector[4]; 5665 constantVector[0] = Nucleus::createConstantInt(x); 5666 constantVector[1] = Nucleus::createConstantInt(y); 5667 constantVector[2] = Nucleus::createConstantInt(z); 5668 constantVector[3] = Nucleus::createConstantInt(w); 5669 5670 storeValue(Nucleus::createConstantVector(constantVector, 4)); 5671 } 5672 5673 UInt4::UInt4(RValue<UInt4> rhs) 5674 { 5675 // xyzw.parent = this; 5676 5677 storeValue(rhs.value); 5678 } 5679 5680 UInt4::UInt4(const UInt4 &rhs) 5681 { 5682 // xyzw.parent = this; 5683 5684 Value *value = rhs.loadValue(); 5685 storeValue(value); 5686 } 5687 5688 UInt4::UInt4(const Reference<UInt4> &rhs) 5689 { 5690 // xyzw.parent = this; 5691 5692 Value *value = rhs.loadValue(); 5693 storeValue(value); 5694 } 5695 5696 UInt4::UInt4(RValue<Int4> rhs) 5697 { 5698 // xyzw.parent = this; 5699 5700 storeValue(rhs.value); 5701 } 5702 5703 UInt4::UInt4(const Int4 &rhs) 5704 { 5705 // xyzw.parent = this; 5706 5707 Value *value = rhs.loadValue(); 5708 storeValue(value); 5709 } 5710 5711 UInt4::UInt4(const Reference<Int4> &rhs) 5712 { 5713 // xyzw.parent = this; 5714 5715 Value *value = rhs.loadValue(); 5716 storeValue(value); 5717 } 5718 5719 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) 5720 { 5721 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 5722 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 5723 5724 Value *long2 = UndefValue::get(Long2::getType()); 5725 long2 = Nucleus::createInsertElement(long2, loLong, 0); 5726 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 5727 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType()); 5728 5729 storeValue(uint4); 5730 } 5731 5732 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const 5733 { 5734 storeValue(rhs.value); 5735 5736 return rhs; 5737 } 5738 5739 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const 5740 { 5741 Value *value = rhs.loadValue(); 5742 storeValue(value); 5743 5744 return RValue<UInt4>(value); 5745 } 5746 5747 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const 5748 { 5749 Value *value = rhs.loadValue(); 5750 storeValue(value); 5751 5752 return RValue<UInt4>(value); 5753 } 5754 5755 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs) 5756 { 5757 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value)); 5758 } 5759 5760 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs) 5761 { 5762 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value)); 5763 } 5764 5765 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs) 5766 { 5767 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value)); 5768 } 5769 5770 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs) 5771 { 5772 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value)); 5773 } 5774 5775 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs) 5776 { 5777 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value)); 5778 } 5779 5780 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs) 5781 { 5782 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value)); 5783 } 5784 5785 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs) 5786 { 5787 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value)); 5788 } 5789 5790 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs) 5791 { 5792 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value)); 5793 } 5794 5795 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs) 5796 { 5797 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs)); 5798 } 5799 5800 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs) 5801 { 5802 return x86::psrld(lhs, rhs); 5803 } 5804 5805 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs) 5806 { 5807 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value)); 5808 } 5809 5810 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs) 5811 { 5812 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value)); 5813 } 5814 5815 RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs) 5816 { 5817 return lhs = lhs + rhs; 5818 } 5819 5820 RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs) 5821 { 5822 return lhs = lhs - rhs; 5823 } 5824 5825 RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs) 5826 { 5827 return lhs = lhs * rhs; 5828 } 5829 5830 // RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs) 5831 // { 5832 // return lhs = lhs / rhs; 5833 // } 5834 5835 // RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs) 5836 // { 5837 // return lhs = lhs % rhs; 5838 // } 5839 5840 RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs) 5841 { 5842 return lhs = lhs & rhs; 5843 } 5844 5845 RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs) 5846 { 5847 return lhs = lhs | rhs; 5848 } 5849 5850 RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs) 5851 { 5852 return lhs = lhs ^ rhs; 5853 } 5854 5855 RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs) 5856 { 5857 return lhs = lhs << rhs; 5858 } 5859 5860 RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs) 5861 { 5862 return lhs = lhs >> rhs; 5863 } 5864 5865 RValue<UInt4> operator+(RValue<UInt4> val) 5866 { 5867 return val; 5868 } 5869 5870 RValue<UInt4> operator-(RValue<UInt4> val) 5871 { 5872 return RValue<UInt4>(Nucleus::createNeg(val.value)); 5873 } 5874 5875 RValue<UInt4> operator~(RValue<UInt4> val) 5876 { 5877 return RValue<UInt4>(Nucleus::createNot(val.value)); 5878 } 5879 5880 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y) 5881 { 5882 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5883 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5884 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5885 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5886 } 5887 5888 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y) 5889 { 5890 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())); 5891 } 5892 5893 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y) 5894 { 5895 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5896 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5897 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType())); 5898 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5899 } 5900 5901 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y) 5902 { 5903 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5904 } 5905 5906 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y) 5907 { 5908 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5909 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5910 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType())); 5911 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5912 } 5913 5914 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y) 5915 { 5916 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())); 5917 } 5918 5919 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y) 5920 { 5921 if(CPUID::supportsSSE4_1()) 5922 { 5923 return x86::pmaxud(x, y); 5924 } 5925 else 5926 { 5927 RValue<UInt4> greater = CmpNLE(x, y); 5928 return x & greater | y & ~greater; 5929 } 5930 } 5931 5932 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y) 5933 { 5934 if(CPUID::supportsSSE4_1()) 5935 { 5936 return x86::pminud(x, y); 5937 } 5938 else 5939 { 5940 RValue<UInt4> less = CmpLT(x, y); 5941 return x & less | y & ~less; 5942 } 5943 } 5944 5945 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y) 5946 { 5947 return x86::packusdw(x, y); // FIXME: Fallback required 5948 } 5949 5950 Type *UInt4::getType() 5951 { 5952 return VectorType::get(UInt::getType(), 4); 5953 } 5954 5955 Float::Float(RValue<Int> cast) 5956 { 5957 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType()); 5958 5959 storeValue(integer); 5960 } 5961 5962 Float::Float() 5963 { 5964 5965 } 5966 5967 Float::Float(float x) 5968 { 5969 storeValue(Nucleus::createConstantFloat(x)); 5970 } 5971 5972 Float::Float(RValue<Float> rhs) 5973 { 5974 storeValue(rhs.value); 5975 } 5976 5977 Float::Float(const Float &rhs) 5978 { 5979 Value *value = rhs.loadValue(); 5980 storeValue(value); 5981 } 5982 5983 Float::Float(const Reference<Float> &rhs) 5984 { 5985 Value *value = rhs.loadValue(); 5986 storeValue(value); 5987 } 5988 5989 RValue<Float> Float::operator=(RValue<Float> rhs) const 5990 { 5991 storeValue(rhs.value); 5992 5993 return rhs; 5994 } 5995 5996 RValue<Float> Float::operator=(const Float &rhs) const 5997 { 5998 Value *value = rhs.loadValue(); 5999 storeValue(value); 6000 6001 return RValue<Float>(value); 6002 } 6003 6004 RValue<Float> Float::operator=(const Reference<Float> &rhs) const 6005 { 6006 Value *value = rhs.loadValue(); 6007 storeValue(value); 6008 6009 return RValue<Float>(value); 6010 } 6011 6012 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs) 6013 { 6014 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value)); 6015 } 6016 6017 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs) 6018 { 6019 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value)); 6020 } 6021 6022 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs) 6023 { 6024 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value)); 6025 } 6026 6027 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs) 6028 { 6029 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value)); 6030 } 6031 6032 RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs) 6033 { 6034 return lhs = lhs + rhs; 6035 } 6036 6037 RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs) 6038 { 6039 return lhs = lhs - rhs; 6040 } 6041 6042 RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs) 6043 { 6044 return lhs = lhs * rhs; 6045 } 6046 6047 RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs) 6048 { 6049 return lhs = lhs / rhs; 6050 } 6051 6052 RValue<Float> operator+(RValue<Float> val) 6053 { 6054 return val; 6055 } 6056 6057 RValue<Float> operator-(RValue<Float> val) 6058 { 6059 return RValue<Float>(Nucleus::createFNeg(val.value)); 6060 } 6061 6062 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs) 6063 { 6064 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value)); 6065 } 6066 6067 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs) 6068 { 6069 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value)); 6070 } 6071 6072 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs) 6073 { 6074 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value)); 6075 } 6076 6077 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs) 6078 { 6079 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value)); 6080 } 6081 6082 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs) 6083 { 6084 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value)); 6085 } 6086 6087 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs) 6088 { 6089 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value)); 6090 } 6091 6092 RValue<Float> Abs(RValue<Float> x) 6093 { 6094 return IfThenElse(x > 0.0f, x, -x); 6095 } 6096 6097 RValue<Float> Max(RValue<Float> x, RValue<Float> y) 6098 { 6099 return IfThenElse(x > y, x, y); 6100 } 6101 6102 RValue<Float> Min(RValue<Float> x, RValue<Float> y) 6103 { 6104 return IfThenElse(x < y, x, y); 6105 } 6106 6107 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2) 6108 { 6109 if(exactAtPow2) 6110 { 6111 // rcpss uses a piecewise-linear approximation which minimizes the relative error 6112 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6113 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6114 } 6115 else 6116 { 6117 return x86::rcpss(x); 6118 } 6119 } 6120 6121 RValue<Float> RcpSqrt_pp(RValue<Float> x) 6122 { 6123 return x86::rsqrtss(x); 6124 } 6125 6126 RValue<Float> Sqrt(RValue<Float> x) 6127 { 6128 return x86::sqrtss(x); 6129 } 6130 6131 RValue<Float> Round(RValue<Float> x) 6132 { 6133 if(CPUID::supportsSSE4_1()) 6134 { 6135 return x86::roundss(x, 0); 6136 } 6137 else 6138 { 6139 return Float4(Round(Float4(x))).x; 6140 } 6141 } 6142 6143 RValue<Float> Trunc(RValue<Float> x) 6144 { 6145 if(CPUID::supportsSSE4_1()) 6146 { 6147 return x86::roundss(x, 3); 6148 } 6149 else 6150 { 6151 return Float(Int(x)); // Rounded toward zero 6152 } 6153 } 6154 6155 RValue<Float> Frac(RValue<Float> x) 6156 { 6157 if(CPUID::supportsSSE4_1()) 6158 { 6159 return x - x86::floorss(x); 6160 } 6161 else 6162 { 6163 return Float4(Frac(Float4(x))).x; 6164 } 6165 } 6166 6167 RValue<Float> Floor(RValue<Float> x) 6168 { 6169 if(CPUID::supportsSSE4_1()) 6170 { 6171 return x86::floorss(x); 6172 } 6173 else 6174 { 6175 return Float4(Floor(Float4(x))).x; 6176 } 6177 } 6178 6179 RValue<Float> Ceil(RValue<Float> x) 6180 { 6181 if(CPUID::supportsSSE4_1()) 6182 { 6183 return x86::ceilss(x); 6184 } 6185 else 6186 { 6187 return Float4(Ceil(Float4(x))).x; 6188 } 6189 } 6190 6191 Type *Float::getType() 6192 { 6193 return Type::getFloatTy(*Nucleus::getContext()); 6194 } 6195 6196 Float2::Float2(RValue<Float4> cast) 6197 { 6198 // xyzw.parent = this; 6199 6200 Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType()); 6201 Value *int64 = Nucleus::createExtractElement(int64x2, 0); 6202 Value *float2 = Nucleus::createBitCast(int64, Float2::getType()); 6203 6204 storeValue(float2); 6205 } 6206 6207 Type *Float2::getType() 6208 { 6209 return VectorType::get(Float::getType(), 2); 6210 } 6211 6212 Float4::Float4(RValue<Byte4> cast) 6213 { 6214 xyzw.parent = this; 6215 6216 #if 0 6217 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes 6218 #elif 0 6219 Value *vector = loadValue(); 6220 6221 Value *i8x = Nucleus::createExtractElement(cast.value, 0); 6222 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType()); 6223 Value *x = Nucleus::createInsertElement(vector, f32x, 0); 6224 6225 Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1)); 6226 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType()); 6227 Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1)); 6228 6229 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2)); 6230 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType()); 6231 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2)); 6232 6233 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3)); 6234 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType()); 6235 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3)); 6236 #else 6237 Value *x = Nucleus::createBitCast(cast.value, Int::getType()); 6238 Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0); 6239 6240 Value *e; 6241 6242 if(CPUID::supportsSSE4_1()) 6243 { 6244 e = x86::pmovzxbd(RValue<Int4>(a)).value; 6245 } 6246 else 6247 { 6248 Constant *swizzle[16]; 6249 swizzle[0] = Nucleus::createConstantInt(0); 6250 swizzle[1] = Nucleus::createConstantInt(16); 6251 swizzle[2] = Nucleus::createConstantInt(1); 6252 swizzle[3] = Nucleus::createConstantInt(17); 6253 swizzle[4] = Nucleus::createConstantInt(2); 6254 swizzle[5] = Nucleus::createConstantInt(18); 6255 swizzle[6] = Nucleus::createConstantInt(3); 6256 swizzle[7] = Nucleus::createConstantInt(19); 6257 swizzle[8] = Nucleus::createConstantInt(4); 6258 swizzle[9] = Nucleus::createConstantInt(20); 6259 swizzle[10] = Nucleus::createConstantInt(5); 6260 swizzle[11] = Nucleus::createConstantInt(21); 6261 swizzle[12] = Nucleus::createConstantInt(6); 6262 swizzle[13] = Nucleus::createConstantInt(22); 6263 swizzle[14] = Nucleus::createConstantInt(7); 6264 swizzle[15] = Nucleus::createConstantInt(23); 6265 6266 Value *b = Nucleus::createBitCast(a, Byte16::getType()); 6267 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), Nucleus::createConstantVector(swizzle, 16)); 6268 6269 Constant *swizzle2[8]; 6270 swizzle2[0] = Nucleus::createConstantInt(0); 6271 swizzle2[1] = Nucleus::createConstantInt(8); 6272 swizzle2[2] = Nucleus::createConstantInt(1); 6273 swizzle2[3] = Nucleus::createConstantInt(9); 6274 swizzle2[4] = Nucleus::createConstantInt(2); 6275 swizzle2[5] = Nucleus::createConstantInt(10); 6276 swizzle2[6] = Nucleus::createConstantInt(3); 6277 swizzle2[7] = Nucleus::createConstantInt(11); 6278 6279 Value *d = Nucleus::createBitCast(c, Short8::getType()); 6280 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle2, 8)); 6281 } 6282 6283 Value *f = Nucleus::createBitCast(e, Int4::getType()); 6284 Value *g = Nucleus::createSIToFP(f, Float4::getType()); 6285 Value *xyzw = g; 6286 #endif 6287 6288 storeValue(xyzw); 6289 } 6290 6291 Float4::Float4(RValue<SByte4> cast) 6292 { 6293 xyzw.parent = this; 6294 6295 #if 0 6296 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes 6297 #elif 0 6298 Value *vector = loadValue(); 6299 6300 Value *i8x = Nucleus::createExtractElement(cast.value, 0); 6301 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType()); 6302 Value *x = Nucleus::createInsertElement(vector, f32x, 0); 6303 6304 Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1)); 6305 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType()); 6306 Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1)); 6307 6308 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2)); 6309 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType()); 6310 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2)); 6311 6312 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3)); 6313 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType()); 6314 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3)); 6315 #else 6316 Value *x = Nucleus::createBitCast(cast.value, Int::getType()); 6317 Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0); 6318 6319 Value *g; 6320 6321 if(CPUID::supportsSSE4_1()) 6322 { 6323 g = x86::pmovsxbd(RValue<Int4>(a)).value; 6324 } 6325 else 6326 { 6327 Constant *swizzle[16]; 6328 swizzle[0] = Nucleus::createConstantInt(0); 6329 swizzle[1] = Nucleus::createConstantInt(0); 6330 swizzle[2] = Nucleus::createConstantInt(1); 6331 swizzle[3] = Nucleus::createConstantInt(1); 6332 swizzle[4] = Nucleus::createConstantInt(2); 6333 swizzle[5] = Nucleus::createConstantInt(2); 6334 swizzle[6] = Nucleus::createConstantInt(3); 6335 swizzle[7] = Nucleus::createConstantInt(3); 6336 swizzle[8] = Nucleus::createConstantInt(4); 6337 swizzle[9] = Nucleus::createConstantInt(4); 6338 swizzle[10] = Nucleus::createConstantInt(5); 6339 swizzle[11] = Nucleus::createConstantInt(5); 6340 swizzle[12] = Nucleus::createConstantInt(6); 6341 swizzle[13] = Nucleus::createConstantInt(6); 6342 swizzle[14] = Nucleus::createConstantInt(7); 6343 swizzle[15] = Nucleus::createConstantInt(7); 6344 6345 Value *b = Nucleus::createBitCast(a, Byte16::getType()); 6346 Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 16)); 6347 6348 Constant *swizzle2[8]; 6349 swizzle2[0] = Nucleus::createConstantInt(0); 6350 swizzle2[1] = Nucleus::createConstantInt(0); 6351 swizzle2[2] = Nucleus::createConstantInt(1); 6352 swizzle2[3] = Nucleus::createConstantInt(1); 6353 swizzle2[4] = Nucleus::createConstantInt(2); 6354 swizzle2[5] = Nucleus::createConstantInt(2); 6355 swizzle2[6] = Nucleus::createConstantInt(3); 6356 swizzle2[7] = Nucleus::createConstantInt(3); 6357 6358 Value *d = Nucleus::createBitCast(c, Short8::getType()); 6359 Value *e = Nucleus::createShuffleVector(d, d, Nucleus::createConstantVector(swizzle2, 8)); 6360 6361 Value *f = Nucleus::createBitCast(e, Int4::getType()); 6362 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24)); 6363 g = x86::psrad(RValue<Int4>(f), 24).value; 6364 } 6365 6366 Value *xyzw = Nucleus::createSIToFP(g, Float4::getType()); 6367 #endif 6368 6369 storeValue(xyzw); 6370 } 6371 6372 Float4::Float4(RValue<Short4> cast) 6373 { 6374 xyzw.parent = this; 6375 6376 Int4 c(cast); 6377 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6378 } 6379 6380 Float4::Float4(RValue<UShort4> cast) 6381 { 6382 xyzw.parent = this; 6383 6384 Int4 c(cast); 6385 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6386 } 6387 6388 Float4::Float4(RValue<Int4> cast) 6389 { 6390 xyzw.parent = this; 6391 6392 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); 6393 6394 storeValue(xyzw); 6395 } 6396 6397 Float4::Float4(RValue<UInt4> cast) 6398 { 6399 xyzw.parent = this; 6400 6401 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); 6402 6403 storeValue(xyzw); 6404 } 6405 6406 Float4::Float4() 6407 { 6408 xyzw.parent = this; 6409 } 6410 6411 Float4::Float4(float xyzw) 6412 { 6413 constant(xyzw, xyzw, xyzw, xyzw); 6414 } 6415 6416 Float4::Float4(float x, float yzw) 6417 { 6418 constant(x, yzw, yzw, yzw); 6419 } 6420 6421 Float4::Float4(float x, float y, float zw) 6422 { 6423 constant(x, y, zw, zw); 6424 } 6425 6426 Float4::Float4(float x, float y, float z, float w) 6427 { 6428 constant(x, y, z, w); 6429 } 6430 6431 void Float4::constant(float x, float y, float z, float w) 6432 { 6433 xyzw.parent = this; 6434 6435 Constant *constantVector[4]; 6436 constantVector[0] = Nucleus::createConstantFloat(x); 6437 constantVector[1] = Nucleus::createConstantFloat(y); 6438 constantVector[2] = Nucleus::createConstantFloat(z); 6439 constantVector[3] = Nucleus::createConstantFloat(w); 6440 6441 storeValue(Nucleus::createConstantVector(constantVector, 4)); 6442 } 6443 6444 Float4::Float4(RValue<Float4> rhs) 6445 { 6446 xyzw.parent = this; 6447 6448 storeValue(rhs.value); 6449 } 6450 6451 Float4::Float4(const Float4 &rhs) 6452 { 6453 xyzw.parent = this; 6454 6455 Value *value = rhs.loadValue(); 6456 storeValue(value); 6457 } 6458 6459 Float4::Float4(const Reference<Float4> &rhs) 6460 { 6461 xyzw.parent = this; 6462 6463 Value *value = rhs.loadValue(); 6464 storeValue(value); 6465 } 6466 6467 Float4::Float4(RValue<Float> rhs) 6468 { 6469 xyzw.parent = this; 6470 6471 Value *vector = loadValue(); 6472 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 6473 6474 Constant *swizzle[4]; 6475 swizzle[0] = Nucleus::createConstantInt(0); 6476 swizzle[1] = Nucleus::createConstantInt(0); 6477 swizzle[2] = Nucleus::createConstantInt(0); 6478 swizzle[3] = Nucleus::createConstantInt(0); 6479 6480 Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4)); 6481 6482 storeValue(replicate); 6483 } 6484 6485 Float4::Float4(const Float &rhs) 6486 { 6487 xyzw.parent = this; 6488 6489 *this = RValue<Float>(rhs.loadValue()); 6490 } 6491 6492 Float4::Float4(const Reference<Float> &rhs) 6493 { 6494 xyzw.parent = this; 6495 6496 *this = RValue<Float>(rhs.loadValue()); 6497 } 6498 6499 RValue<Float4> Float4::operator=(float x) const 6500 { 6501 return *this = Float4(x, x, x, x); 6502 } 6503 6504 RValue<Float4> Float4::operator=(RValue<Float4> rhs) const 6505 { 6506 storeValue(rhs.value); 6507 6508 return rhs; 6509 } 6510 6511 RValue<Float4> Float4::operator=(const Float4 &rhs) const 6512 { 6513 Value *value = rhs.loadValue(); 6514 storeValue(value); 6515 6516 return RValue<Float4>(value); 6517 } 6518 6519 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const 6520 { 6521 Value *value = rhs.loadValue(); 6522 storeValue(value); 6523 6524 return RValue<Float4>(value); 6525 } 6526 6527 RValue<Float4> Float4::operator=(RValue<Float> rhs) const 6528 { 6529 return *this = Float4(rhs); 6530 } 6531 6532 RValue<Float4> Float4::operator=(const Float &rhs) const 6533 { 6534 return *this = Float4(rhs); 6535 } 6536 6537 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const 6538 { 6539 return *this = Float4(rhs); 6540 } 6541 6542 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs) 6543 { 6544 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value)); 6545 } 6546 6547 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs) 6548 { 6549 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value)); 6550 } 6551 6552 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs) 6553 { 6554 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value)); 6555 } 6556 6557 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs) 6558 { 6559 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value)); 6560 } 6561 6562 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs) 6563 { 6564 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value)); 6565 } 6566 6567 RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs) 6568 { 6569 return lhs = lhs + rhs; 6570 } 6571 6572 RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs) 6573 { 6574 return lhs = lhs - rhs; 6575 } 6576 6577 RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs) 6578 { 6579 return lhs = lhs * rhs; 6580 } 6581 6582 RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs) 6583 { 6584 return lhs = lhs / rhs; 6585 } 6586 6587 RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs) 6588 { 6589 return lhs = lhs % rhs; 6590 } 6591 6592 RValue<Float4> operator+(RValue<Float4> val) 6593 { 6594 return val; 6595 } 6596 6597 RValue<Float4> operator-(RValue<Float4> val) 6598 { 6599 return RValue<Float4>(Nucleus::createFNeg(val.value)); 6600 } 6601 6602 RValue<Float4> Abs(RValue<Float4> x) 6603 { 6604 Value *vector = Nucleus::createBitCast(x.value, Int4::getType()); 6605 6606 Constant *constantVector[4]; 6607 constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF); 6608 constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF); 6609 constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF); 6610 constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF); 6611 6612 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, 4)); 6613 6614 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType())); 6615 } 6616 6617 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) 6618 { 6619 return x86::maxps(x, y); 6620 } 6621 6622 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y) 6623 { 6624 return x86::minps(x, y); 6625 } 6626 6627 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2) 6628 { 6629 if(exactAtPow2) 6630 { 6631 // rcpps uses a piecewise-linear approximation which minimizes the relative error 6632 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6633 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6634 } 6635 else 6636 { 6637 return x86::rcpps(x); 6638 } 6639 } 6640 6641 RValue<Float4> RcpSqrt_pp(RValue<Float4> x) 6642 { 6643 return x86::rsqrtps(x); 6644 } 6645 6646 RValue<Float4> Sqrt(RValue<Float4> x) 6647 { 6648 return x86::sqrtps(x); 6649 } 6650 6651 RValue<Float4> Insert(const Float4 &val, RValue<Float> element, int i) 6652 { 6653 llvm::Value *value = val.loadValue(); 6654 llvm::Value *insert = Nucleus::createInsertElement(value, element.value, i); 6655 6656 val = RValue<Float4>(insert); 6657 6658 return val; 6659 } 6660 6661 RValue<Float> Extract(RValue<Float4> x, int i) 6662 { 6663 return RValue<Float>(Nucleus::createExtractElement(x.value, i)); 6664 } 6665 6666 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select) 6667 { 6668 return RValue<Float4>(Nucleus::createSwizzle(x.value, select)); 6669 } 6670 6671 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 6672 { 6673 Constant *shuffle[4]; 6674 shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0); 6675 shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0); 6676 shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4); 6677 shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4); 6678 6679 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4))); 6680 } 6681 6682 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y) 6683 { 6684 Constant *shuffle[4]; 6685 shuffle[0] = Nucleus::createConstantInt(0); 6686 shuffle[1] = Nucleus::createConstantInt(4); 6687 shuffle[2] = Nucleus::createConstantInt(1); 6688 shuffle[3] = Nucleus::createConstantInt(5); 6689 6690 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4))); 6691 } 6692 6693 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y) 6694 { 6695 Constant *shuffle[4]; 6696 shuffle[0] = Nucleus::createConstantInt(2); 6697 shuffle[1] = Nucleus::createConstantInt(6); 6698 shuffle[2] = Nucleus::createConstantInt(3); 6699 shuffle[3] = Nucleus::createConstantInt(7); 6700 6701 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4))); 6702 } 6703 6704 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) 6705 { 6706 Value *vector = lhs.loadValue(); 6707 Value *shuffle = Nucleus::createMask(vector, rhs.value, select); 6708 lhs.storeValue(shuffle); 6709 6710 return RValue<Float4>(shuffle); 6711 } 6712 6713 RValue<Int> SignMask(RValue<Float4> x) 6714 { 6715 return x86::movmskps(x); 6716 } 6717 6718 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y) 6719 { 6720 // return As<Int4>(x86::cmpeqps(x, y)); 6721 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType())); 6722 } 6723 6724 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y) 6725 { 6726 // return As<Int4>(x86::cmpltps(x, y)); 6727 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType())); 6728 } 6729 6730 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y) 6731 { 6732 // return As<Int4>(x86::cmpleps(x, y)); 6733 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType())); 6734 } 6735 6736 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y) 6737 { 6738 // return As<Int4>(x86::cmpneqps(x, y)); 6739 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType())); 6740 } 6741 6742 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y) 6743 { 6744 // return As<Int4>(x86::cmpnltps(x, y)); 6745 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType())); 6746 } 6747 6748 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y) 6749 { 6750 // return As<Int4>(x86::cmpnleps(x, y)); 6751 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType())); 6752 } 6753 6754 RValue<Float4> Round(RValue<Float4> x) 6755 { 6756 if(CPUID::supportsSSE4_1()) 6757 { 6758 return x86::roundps(x, 0); 6759 } 6760 else 6761 { 6762 return Float4(RoundInt(x)); 6763 } 6764 } 6765 6766 RValue<Float4> Trunc(RValue<Float4> x) 6767 { 6768 if(CPUID::supportsSSE4_1()) 6769 { 6770 return x86::roundps(x, 3); 6771 } 6772 else 6773 { 6774 return Float4(Int4(x)); // Rounded toward zero 6775 } 6776 } 6777 6778 RValue<Float4> Frac(RValue<Float4> x) 6779 { 6780 if(CPUID::supportsSSE4_1()) 6781 { 6782 return x - x86::floorps(x); 6783 } 6784 else 6785 { 6786 Float4 frc = x - Float4(Int4(x)); // Signed fractional part 6787 6788 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); 6789 } 6790 } 6791 6792 RValue<Float4> Floor(RValue<Float4> x) 6793 { 6794 if(CPUID::supportsSSE4_1()) 6795 { 6796 return x86::floorps(x); 6797 } 6798 else 6799 { 6800 return x - Frac(x); 6801 } 6802 } 6803 6804 RValue<Float4> Ceil(RValue<Float4> x) 6805 { 6806 if(CPUID::supportsSSE4_1()) 6807 { 6808 return x86::ceilps(x); 6809 } 6810 else 6811 { 6812 return -Floor(-x); 6813 } 6814 } 6815 6816 Type *Float4::getType() 6817 { 6818 return VectorType::get(Float::getType(), 4); 6819 } 6820 6821 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) 6822 { 6823 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Nucleus::createConstantInt(offset))); 6824 } 6825 6826 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6827 { 6828 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value)); 6829 } 6830 6831 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6832 { 6833 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value)); 6834 } 6835 6836 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset) 6837 { 6838 return lhs = lhs + offset; 6839 } 6840 6841 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset) 6842 { 6843 return lhs = lhs + offset; 6844 } 6845 6846 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset) 6847 { 6848 return lhs = lhs + offset; 6849 } 6850 6851 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset) 6852 { 6853 return lhs + -offset; 6854 } 6855 6856 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6857 { 6858 return lhs + -offset; 6859 } 6860 6861 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6862 { 6863 return lhs + -offset; 6864 } 6865 6866 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset) 6867 { 6868 return lhs = lhs - offset; 6869 } 6870 6871 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset) 6872 { 6873 return lhs = lhs - offset; 6874 } 6875 6876 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset) 6877 { 6878 return lhs = lhs - offset; 6879 } 6880 6881 void Return() 6882 { 6883 Nucleus::createRetVoid(); 6884 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6885 Nucleus::createUnreachable(); 6886 } 6887 6888 void Return(bool ret) 6889 { 6890 Nucleus::createRet(Nucleus::createConstantBool(ret)); 6891 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6892 Nucleus::createUnreachable(); 6893 } 6894 6895 void Return(const Int &ret) 6896 { 6897 Nucleus::createRet(ret.loadValue()); 6898 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6899 Nucleus::createUnreachable(); 6900 } 6901 6902 BasicBlock *beginLoop() 6903 { 6904 BasicBlock *loopBB = Nucleus::createBasicBlock(); 6905 6906 Nucleus::createBr(loopBB); 6907 Nucleus::setInsertBlock(loopBB); 6908 6909 return loopBB; 6910 } 6911 6912 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB) 6913 { 6914 Nucleus::createCondBr(cmp.value, bodyBB, endBB); 6915 Nucleus::setInsertBlock(bodyBB); 6916 6917 return true; 6918 } 6919 6920 bool elseBlock(BasicBlock *falseBB) 6921 { 6922 falseBB->back().eraseFromParent(); 6923 Nucleus::setInsertBlock(falseBB); 6924 6925 return true; 6926 } 6927 6928 RValue<Long> Ticks() 6929 { 6930 Module *module = Nucleus::getModule(); 6931 llvm::Function *rdtsc = Intrinsic::getDeclaration(module, Intrinsic::readcyclecounter); 6932 6933 return RValue<Long>(Nucleus::createCall(rdtsc)); 6934 } 6935 } 6936 6937 namespace sw 6938 { 6939 namespace x86 6940 { 6941 RValue<Int> cvtss2si(RValue<Float> val) 6942 { 6943 Module *module = Nucleus::getModule(); 6944 llvm::Function *cvtss2si = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtss2si); 6945 6946 Float4 vector; 6947 vector.x = val; 6948 6949 return RValue<Int>(Nucleus::createCall(cvtss2si, RValue<Float4>(vector).value)); 6950 } 6951 6952 RValue<Int2> cvtps2pi(RValue<Float4> val) 6953 { 6954 Module *module = Nucleus::getModule(); 6955 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtps2pi); 6956 6957 return RValue<Int2>(Nucleus::createCall(cvtps2pi, val.value)); 6958 } 6959 6960 RValue<Int2> cvttps2pi(RValue<Float4> val) 6961 { 6962 Module *module = Nucleus::getModule(); 6963 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvttps2pi); 6964 6965 return RValue<Int2>(Nucleus::createCall(cvttps2pi, val.value)); 6966 } 6967 6968 RValue<Int4> cvtps2dq(RValue<Float4> val) 6969 { 6970 if(CPUID::supportsSSE2()) 6971 { 6972 Module *module = Nucleus::getModule(); 6973 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtps2dq); 6974 6975 return RValue<Int4>(Nucleus::createCall(cvtps2dq, val.value)); 6976 } 6977 else 6978 { 6979 Int2 lo = x86::cvtps2pi(val); 6980 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE)); 6981 6982 return Int4(lo, hi); 6983 } 6984 } 6985 6986 RValue<Float> rcpss(RValue<Float> val) 6987 { 6988 Module *module = Nucleus::getModule(); 6989 llvm::Function *rcpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ss); 6990 6991 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0); 6992 6993 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rcpss, vector), 0)); 6994 } 6995 6996 RValue<Float> sqrtss(RValue<Float> val) 6997 { 6998 Module *module = Nucleus::getModule(); 6999 llvm::Function *sqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ss); 7000 7001 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0); 7002 7003 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(sqrtss, vector), 0)); 7004 } 7005 7006 RValue<Float> rsqrtss(RValue<Float> val) 7007 { 7008 Module *module = Nucleus::getModule(); 7009 llvm::Function *rsqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ss); 7010 7011 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0); 7012 7013 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rsqrtss, vector), 0)); 7014 } 7015 7016 RValue<Float4> rcpps(RValue<Float4> val) 7017 { 7018 Module *module = Nucleus::getModule(); 7019 llvm::Function *rcpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ps); 7020 7021 return RValue<Float4>(Nucleus::createCall(rcpps, val.value)); 7022 } 7023 7024 RValue<Float4> sqrtps(RValue<Float4> val) 7025 { 7026 Module *module = Nucleus::getModule(); 7027 llvm::Function *sqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ps); 7028 7029 return RValue<Float4>(Nucleus::createCall(sqrtps, val.value)); 7030 } 7031 7032 RValue<Float4> rsqrtps(RValue<Float4> val) 7033 { 7034 Module *module = Nucleus::getModule(); 7035 llvm::Function *rsqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ps); 7036 7037 return RValue<Float4>(Nucleus::createCall(rsqrtps, val.value)); 7038 } 7039 7040 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y) 7041 { 7042 Module *module = Nucleus::getModule(); 7043 llvm::Function *maxps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_max_ps); 7044 7045 return RValue<Float4>(Nucleus::createCall(maxps, x.value, y.value)); 7046 } 7047 7048 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y) 7049 { 7050 Module *module = Nucleus::getModule(); 7051 llvm::Function *minps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_min_ps); 7052 7053 return RValue<Float4>(Nucleus::createCall(minps, x.value, y.value)); 7054 } 7055 7056 RValue<Float> roundss(RValue<Float> val, unsigned char imm) 7057 { 7058 Module *module = Nucleus::getModule(); 7059 llvm::Function *roundss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ss); 7060 7061 Value *undef = UndefValue::get(Float4::getType()); 7062 Value *vector = Nucleus::createInsertElement(undef, val.value, 0); 7063 7064 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(roundss, undef, vector, Nucleus::createConstantInt(imm)), 0)); 7065 } 7066 7067 RValue<Float> floorss(RValue<Float> val) 7068 { 7069 return roundss(val, 1); 7070 } 7071 7072 RValue<Float> ceilss(RValue<Float> val) 7073 { 7074 return roundss(val, 2); 7075 } 7076 7077 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm) 7078 { 7079 Module *module = Nucleus::getModule(); 7080 llvm::Function *roundps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ps); 7081 7082 return RValue<Float4>(Nucleus::createCall(roundps, val.value, Nucleus::createConstantInt(imm))); 7083 } 7084 7085 RValue<Float4> floorps(RValue<Float4> val) 7086 { 7087 return roundps(val, 1); 7088 } 7089 7090 RValue<Float4> ceilps(RValue<Float4> val) 7091 { 7092 return roundps(val, 2); 7093 } 7094 7095 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 7096 { 7097 Module *module = Nucleus::getModule(); 7098 llvm::Function *cmpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ps); 7099 7100 return RValue<Float4>(Nucleus::createCall(cmpps, x.value, y.value, Nucleus::createConstantByte(imm))); 7101 } 7102 7103 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y) 7104 { 7105 return cmpps(x, y, 0); 7106 } 7107 7108 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y) 7109 { 7110 return cmpps(x, y, 1); 7111 } 7112 7113 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y) 7114 { 7115 return cmpps(x, y, 2); 7116 } 7117 7118 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y) 7119 { 7120 return cmpps(x, y, 3); 7121 } 7122 7123 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y) 7124 { 7125 return cmpps(x, y, 4); 7126 } 7127 7128 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y) 7129 { 7130 return cmpps(x, y, 5); 7131 } 7132 7133 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y) 7134 { 7135 return cmpps(x, y, 6); 7136 } 7137 7138 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y) 7139 { 7140 return cmpps(x, y, 7); 7141 } 7142 7143 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm) 7144 { 7145 Module *module = Nucleus::getModule(); 7146 llvm::Function *cmpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ss); 7147 7148 Value *vector1 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), x.value, 0); 7149 Value *vector2 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), y.value, 0); 7150 7151 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(cmpss, vector1, vector2, Nucleus::createConstantByte(imm)), 0)); 7152 } 7153 7154 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y) 7155 { 7156 return cmpss(x, y, 0); 7157 } 7158 7159 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y) 7160 { 7161 return cmpss(x, y, 1); 7162 } 7163 7164 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y) 7165 { 7166 return cmpss(x, y, 2); 7167 } 7168 7169 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y) 7170 { 7171 return cmpss(x, y, 3); 7172 } 7173 7174 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y) 7175 { 7176 return cmpss(x, y, 4); 7177 } 7178 7179 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y) 7180 { 7181 return cmpss(x, y, 5); 7182 } 7183 7184 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y) 7185 { 7186 return cmpss(x, y, 6); 7187 } 7188 7189 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y) 7190 { 7191 return cmpss(x, y, 7); 7192 } 7193 7194 RValue<Int4> pabsd(RValue<Int4> x) 7195 { 7196 Module *module = Nucleus::getModule(); 7197 llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128); 7198 7199 return RValue<Int4>(Nucleus::createCall(pabsd, x.value)); 7200 } 7201 7202 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) 7203 { 7204 Module *module = Nucleus::getModule(); 7205 llvm::Function *paddsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_w); 7206 7207 return As<Short4>(RValue<MMX>(Nucleus::createCall(paddsw, As<MMX>(x).value, As<MMX>(y).value))); 7208 } 7209 7210 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) 7211 { 7212 Module *module = Nucleus::getModule(); 7213 llvm::Function *psubsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_w); 7214 7215 return As<Short4>(RValue<MMX>(Nucleus::createCall(psubsw, As<MMX>(x).value, As<MMX>(y).value))); 7216 } 7217 7218 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) 7219 { 7220 Module *module = Nucleus::getModule(); 7221 llvm::Function *paddusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_w); 7222 7223 return As<UShort4>(RValue<MMX>(Nucleus::createCall(paddusw, As<MMX>(x).value, As<MMX>(y).value))); 7224 } 7225 7226 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) 7227 { 7228 Module *module = Nucleus::getModule(); 7229 llvm::Function *psubusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_w); 7230 7231 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psubusw, As<MMX>(x).value, As<MMX>(y).value))); 7232 } 7233 7234 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) 7235 { 7236 Module *module = Nucleus::getModule(); 7237 llvm::Function *paddsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_b); 7238 7239 return As<SByte8>(RValue<MMX>(Nucleus::createCall(paddsb, As<MMX>(x).value, As<MMX>(y).value))); 7240 } 7241 7242 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) 7243 { 7244 Module *module = Nucleus::getModule(); 7245 llvm::Function *psubsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_b); 7246 7247 return As<SByte8>(RValue<MMX>(Nucleus::createCall(psubsb, As<MMX>(x).value, As<MMX>(y).value))); 7248 } 7249 7250 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) 7251 { 7252 Module *module = Nucleus::getModule(); 7253 llvm::Function *paddusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_b); 7254 7255 return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddusb, As<MMX>(x).value, As<MMX>(y).value))); 7256 } 7257 7258 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) 7259 { 7260 Module *module = Nucleus::getModule(); 7261 llvm::Function *psubusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_b); 7262 7263 return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubusb, As<MMX>(x).value, As<MMX>(y).value))); 7264 } 7265 7266 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y) 7267 { 7268 Module *module = Nucleus::getModule(); 7269 llvm::Function *paddw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_w); 7270 7271 return As<Short4>(RValue<MMX>(Nucleus::createCall(paddw, As<MMX>(x).value, As<MMX>(y).value))); 7272 } 7273 7274 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y) 7275 { 7276 Module *module = Nucleus::getModule(); 7277 llvm::Function *psubw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_w); 7278 7279 return As<Short4>(RValue<MMX>(Nucleus::createCall(psubw, As<MMX>(x).value, As<MMX>(y).value))); 7280 } 7281 7282 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y) 7283 { 7284 Module *module = Nucleus::getModule(); 7285 llvm::Function *pmullw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmull_w); 7286 7287 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmullw, As<MMX>(x).value, As<MMX>(y).value))); 7288 } 7289 7290 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y) 7291 { 7292 Module *module = Nucleus::getModule(); 7293 llvm::Function *pand = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pand); 7294 7295 return As<Short4>(RValue<MMX>(Nucleus::createCall(pand, As<MMX>(x).value, As<MMX>(y).value))); 7296 } 7297 7298 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y) 7299 { 7300 Module *module = Nucleus::getModule(); 7301 llvm::Function *por = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_por); 7302 7303 return As<Short4>(RValue<MMX>(Nucleus::createCall(por, As<MMX>(x).value, As<MMX>(y).value))); 7304 } 7305 7306 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y) 7307 { 7308 Module *module = Nucleus::getModule(); 7309 llvm::Function *pxor = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pxor); 7310 7311 return As<Short4>(RValue<MMX>(Nucleus::createCall(pxor, As<MMX>(x).value, As<MMX>(y).value))); 7312 } 7313 7314 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y) 7315 { 7316 Module *module = Nucleus::getModule(); 7317 llvm::Function *pshufw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_pshuf_w); 7318 7319 return As<Short4>(RValue<MMX>(Nucleus::createCall(pshufw, As<MMX>(x).value, Nucleus::createConstantByte(y)))); 7320 } 7321 7322 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y) 7323 { 7324 Module *module = Nucleus::getModule(); 7325 llvm::Function *punpcklwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklwd); 7326 7327 return As<Int2>(RValue<MMX>(Nucleus::createCall(punpcklwd, As<MMX>(x).value, As<MMX>(y).value))); 7328 } 7329 7330 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y) 7331 { 7332 Module *module = Nucleus::getModule(); 7333 llvm::Function *punpckhwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhwd); 7334 7335 return As<Int2>(RValue<MMX>(Nucleus::createCall(punpckhwd, As<MMX>(x).value, As<MMX>(y).value))); 7336 } 7337 7338 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i) 7339 { 7340 Module *module = Nucleus::getModule(); 7341 llvm::Function *pinsrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pinsr_w); 7342 7343 return As<Short4>(RValue<MMX>(Nucleus::createCall(pinsrw, As<MMX>(x).value, y.value, Nucleus::createConstantInt(i)))); 7344 } 7345 7346 RValue<Int> pextrw(RValue<Short4> x, unsigned int i) 7347 { 7348 Module *module = Nucleus::getModule(); 7349 llvm::Function *pextrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pextr_w); 7350 7351 return RValue<Int>(Nucleus::createCall(pextrw, As<MMX>(x).value, Nucleus::createConstantInt(i))); 7352 } 7353 7354 RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y) 7355 { 7356 Module *module = Nucleus::getModule(); 7357 llvm::Function *punpckldq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckldq); 7358 7359 return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckldq, As<MMX>(x).value, As<MMX>(y).value))); 7360 } 7361 7362 RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y) 7363 { 7364 Module *module = Nucleus::getModule(); 7365 llvm::Function *punpckhdq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhdq); 7366 7367 return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckhdq, As<MMX>(x).value, As<MMX>(y).value))); 7368 } 7369 7370 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y) 7371 { 7372 Module *module = Nucleus::getModule(); 7373 llvm::Function *punpcklbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklbw); 7374 7375 return As<Short4>(RValue<MMX>(Nucleus::createCall(punpcklbw, As<MMX>(x).value, As<MMX>(y).value))); 7376 } 7377 7378 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y) 7379 { 7380 Module *module = Nucleus::getModule(); 7381 llvm::Function *punpckhbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhbw); 7382 7383 return As<Short4>(RValue<MMX>(Nucleus::createCall(punpckhbw, As<MMX>(x).value, As<MMX>(y).value))); 7384 } 7385 7386 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y) 7387 { 7388 Module *module = Nucleus::getModule(); 7389 llvm::Function *paddb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_b); 7390 7391 return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddb, As<MMX>(x).value, As<MMX>(y).value))); 7392 } 7393 7394 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y) 7395 { 7396 Module *module = Nucleus::getModule(); 7397 llvm::Function *psubb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_b); 7398 7399 return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubb, As<MMX>(x).value, As<MMX>(y).value))); 7400 } 7401 7402 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y) 7403 { 7404 Module *module = Nucleus::getModule(); 7405 llvm::Function *paddd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_d); 7406 7407 return As<Int2>(RValue<MMX>(Nucleus::createCall(paddd, As<MMX>(x).value, As<MMX>(y).value))); 7408 } 7409 7410 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y) 7411 { 7412 Module *module = Nucleus::getModule(); 7413 llvm::Function *psubd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_d); 7414 7415 return As<Int2>(RValue<MMX>(Nucleus::createCall(psubd, As<MMX>(x).value, As<MMX>(y).value))); 7416 } 7417 7418 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) 7419 { 7420 Module *module = Nucleus::getModule(); 7421 llvm::Function *pavgw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pavg_w); 7422 7423 return As<UShort4>(RValue<MMX>(Nucleus::createCall(pavgw, As<MMX>(x).value, As<MMX>(y).value))); 7424 } 7425 7426 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) 7427 { 7428 Module *module = Nucleus::getModule(); 7429 llvm::Function *pmaxsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmaxs_w); 7430 7431 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmaxsw, As<MMX>(x).value, As<MMX>(y).value))); 7432 } 7433 7434 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) 7435 { 7436 Module *module = Nucleus::getModule(); 7437 llvm::Function *pminsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmins_w); 7438 7439 return As<Short4>(RValue<MMX>(Nucleus::createCall(pminsw, As<MMX>(x).value, As<MMX>(y).value))); 7440 } 7441 7442 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) 7443 { 7444 Module *module = Nucleus::getModule(); 7445 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_w); 7446 7447 return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value))); 7448 } 7449 7450 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) 7451 { 7452 Module *module = Nucleus::getModule(); 7453 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_w); 7454 7455 return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value))); 7456 } 7457 7458 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) 7459 { 7460 Module *module = Nucleus::getModule(); 7461 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_b); 7462 7463 return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value))); 7464 } 7465 7466 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) 7467 { 7468 Module *module = Nucleus::getModule(); 7469 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_b); 7470 7471 return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value))); 7472 } 7473 7474 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) 7475 { 7476 Module *module = Nucleus::getModule(); 7477 llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packssdw); 7478 7479 return As<Short4>(RValue<MMX>(Nucleus::createCall(packssdw, As<MMX>(x).value, As<MMX>(y).value))); 7480 } 7481 7482 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) 7483 { 7484 if(CPUID::supportsSSE2()) 7485 { 7486 Module *module = Nucleus::getModule(); 7487 llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_packssdw_128); 7488 7489 return RValue<Short8>(Nucleus::createCall(packssdw, x.value, y.value)); 7490 } 7491 else 7492 { 7493 Int2 loX = Int2(x); 7494 Int2 hiX = Int2(Swizzle(x, 0xEE)); 7495 7496 Int2 loY = Int2(y); 7497 Int2 hiY = Int2(Swizzle(y, 0xEE)); 7498 7499 Short4 lo = x86::packssdw(loX, hiX); 7500 Short4 hi = x86::packssdw(loY, hiY); 7501 7502 return Short8(lo, hi); 7503 } 7504 } 7505 7506 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) 7507 { 7508 Module *module = Nucleus::getModule(); 7509 llvm::Function *packsswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packsswb); 7510 7511 return As<SByte8>(RValue<MMX>(Nucleus::createCall(packsswb, As<MMX>(x).value, As<MMX>(y).value))); 7512 } 7513 7514 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y) 7515 { 7516 Module *module = Nucleus::getModule(); 7517 llvm::Function *packuswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packuswb); 7518 7519 return As<Byte8>(RValue<MMX>(Nucleus::createCall(packuswb, As<MMX>(x).value, As<MMX>(y).value))); 7520 } 7521 7522 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y) 7523 { 7524 if(CPUID::supportsSSE4_1()) 7525 { 7526 Module *module = Nucleus::getModule(); 7527 llvm::Function *packusdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_packusdw); 7528 7529 return RValue<UShort8>(Nucleus::createCall(packusdw, x.value, y.value)); 7530 } 7531 else 7532 { 7533 // FIXME: Not an exact replacement! 7534 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u)); 7535 } 7536 } 7537 7538 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) 7539 { 7540 Module *module = Nucleus::getModule(); 7541 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_w); 7542 7543 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7544 } 7545 7546 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) 7547 { 7548 Module *module = Nucleus::getModule(); 7549 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_w); 7550 7551 return RValue<UShort8>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y))); 7552 } 7553 7554 RValue<Short4> psraw(RValue<Short4> x, unsigned char y) 7555 { 7556 Module *module = Nucleus::getModule(); 7557 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_w); 7558 7559 return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7560 } 7561 7562 RValue<Short8> psraw(RValue<Short8> x, unsigned char y) 7563 { 7564 Module *module = Nucleus::getModule(); 7565 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_w); 7566 7567 return RValue<Short8>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y))); 7568 } 7569 7570 RValue<Short4> psllw(RValue<Short4> x, unsigned char y) 7571 { 7572 Module *module = Nucleus::getModule(); 7573 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_w); 7574 7575 return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7576 } 7577 7578 RValue<Short8> psllw(RValue<Short8> x, unsigned char y) 7579 { 7580 Module *module = Nucleus::getModule(); 7581 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_w); 7582 7583 return RValue<Short8>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y))); 7584 } 7585 7586 RValue<Int2> pslld(RValue<Int2> x, unsigned char y) 7587 { 7588 Module *module = Nucleus::getModule(); 7589 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_d); 7590 7591 return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7592 } 7593 7594 RValue<Int4> pslld(RValue<Int4> x, unsigned char y) 7595 { 7596 if(CPUID::supportsSSE2()) 7597 { 7598 Module *module = Nucleus::getModule(); 7599 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_d); 7600 7601 return RValue<Int4>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y))); 7602 } 7603 else 7604 { 7605 Int2 lo = Int2(x); 7606 Int2 hi = Int2(Swizzle(x, 0xEE)); 7607 7608 lo = x86::pslld(lo, y); 7609 hi = x86::pslld(hi, y); 7610 7611 return Int4(lo, hi); 7612 } 7613 } 7614 7615 RValue<Int2> psrad(RValue<Int2> x, unsigned char y) 7616 { 7617 Module *module = Nucleus::getModule(); 7618 llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_d); 7619 7620 return As<Int2>(RValue<MMX>(Nucleus::createCall(psrad, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7621 } 7622 7623 RValue<Int4> psrad(RValue<Int4> x, unsigned char y) 7624 { 7625 if(CPUID::supportsSSE2()) 7626 { 7627 Module *module = Nucleus::getModule(); 7628 llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_d); 7629 7630 return RValue<Int4>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y))); 7631 } 7632 else 7633 { 7634 Int2 lo = Int2(x); 7635 Int2 hi = Int2(Swizzle(x, 0xEE)); 7636 7637 lo = x86::psrad(lo, y); 7638 hi = x86::psrad(hi, y); 7639 7640 return Int4(lo, hi); 7641 } 7642 } 7643 7644 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) 7645 { 7646 Module *module = Nucleus::getModule(); 7647 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_d); 7648 7649 return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, Nucleus::createConstantInt(y)))); 7650 } 7651 7652 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) 7653 { 7654 if(CPUID::supportsSSE2()) 7655 { 7656 Module *module = Nucleus::getModule(); 7657 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_d); 7658 7659 return RValue<UInt4>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y))); 7660 } 7661 else 7662 { 7663 UInt2 lo = As<UInt2>(Int2(As<Int4>(x))); 7664 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE))); 7665 7666 lo = x86::psrld(lo, y); 7667 hi = x86::psrld(hi, y); 7668 7669 return UInt4(lo, hi); 7670 } 7671 } 7672 7673 RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y) 7674 { 7675 Module *module = Nucleus::getModule(); 7676 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_w); 7677 7678 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, As<MMX>(y).value))); 7679 } 7680 7681 RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y) 7682 { 7683 Module *module = Nucleus::getModule(); 7684 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_w); 7685 7686 return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, As<MMX>(y).value))); 7687 } 7688 7689 RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y) 7690 { 7691 Module *module = Nucleus::getModule(); 7692 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_w); 7693 7694 return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, As<MMX>(y).value))); 7695 } 7696 7697 RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y) 7698 { 7699 Module *module = Nucleus::getModule(); 7700 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_d); 7701 7702 return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, As<MMX>(y).value))); 7703 } 7704 7705 RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y) 7706 { 7707 Module *module = Nucleus::getModule(); 7708 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_d); 7709 7710 return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value))); 7711 } 7712 7713 RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y) 7714 { 7715 Module *module = Nucleus::getModule(); 7716 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_d); 7717 7718 return As<Int2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value))); 7719 } 7720 7721 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) 7722 { 7723 Module *module = Nucleus::getModule(); 7724 llvm::Function *pmaxsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxsd); 7725 7726 return RValue<Int4>(Nucleus::createCall(pmaxsd, x.value, y.value)); 7727 } 7728 7729 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y) 7730 { 7731 Module *module = Nucleus::getModule(); 7732 llvm::Function *pminsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminsd); 7733 7734 return RValue<Int4>(Nucleus::createCall(pminsd, x.value, y.value)); 7735 } 7736 7737 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y) 7738 { 7739 Module *module = Nucleus::getModule(); 7740 llvm::Function *pmaxud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxud); 7741 7742 return RValue<UInt4>(Nucleus::createCall(pmaxud, x.value, y.value)); 7743 } 7744 7745 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y) 7746 { 7747 Module *module = Nucleus::getModule(); 7748 llvm::Function *pminud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminud); 7749 7750 return RValue<UInt4>(Nucleus::createCall(pminud, x.value, y.value)); 7751 } 7752 7753 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) 7754 { 7755 Module *module = Nucleus::getModule(); 7756 llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulh_w); 7757 7758 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmulhw, As<MMX>(x).value, As<MMX>(y).value))); 7759 } 7760 7761 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) 7762 { 7763 Module *module = Nucleus::getModule(); 7764 llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulhu_w); 7765 7766 return As<UShort4>(RValue<MMX>(Nucleus::createCall(pmulhuw, As<MMX>(x).value, As<MMX>(y).value))); 7767 } 7768 7769 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) 7770 { 7771 Module *module = Nucleus::getModule(); 7772 llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmadd_wd); 7773 7774 return As<Int2>(RValue<MMX>(Nucleus::createCall(pmaddwd, As<MMX>(x).value, As<MMX>(y).value))); 7775 } 7776 7777 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) 7778 { 7779 Module *module = Nucleus::getModule(); 7780 llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulh_w); 7781 7782 return RValue<Short8>(Nucleus::createCall(pmulhw, x.value, y.value)); 7783 } 7784 7785 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y) 7786 { 7787 Module *module = Nucleus::getModule(); 7788 llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulhu_w); 7789 7790 return RValue<UShort8>(Nucleus::createCall(pmulhuw, x.value, y.value)); 7791 } 7792 7793 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y) 7794 { 7795 Module *module = Nucleus::getModule(); 7796 llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmadd_wd); 7797 7798 return RValue<Int4>(Nucleus::createCall(pmaddwd, x.value, y.value)); 7799 } 7800 7801 RValue<Int> movmskps(RValue<Float4> x) 7802 { 7803 Module *module = Nucleus::getModule(); 7804 llvm::Function *movmskps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_movmsk_ps); 7805 7806 return RValue<Int>(Nucleus::createCall(movmskps, x.value)); 7807 } 7808 7809 RValue<Int> pmovmskb(RValue<Byte8> x) 7810 { 7811 Module *module = Nucleus::getModule(); 7812 llvm::Function *pmovmskb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmovmskb); 7813 7814 return RValue<Int>(Nucleus::createCall(pmovmskb, As<MMX>(x).value)); 7815 } 7816 7817 //RValue<Int2> movd(RValue<Pointer<Int>> x) 7818 //{ 7819 // Value *element = Nucleus::createLoad(x.value); 7820 7821 //// Value *int2 = UndefValue::get(Int2::getType()); 7822 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0)); 7823 7824 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType()); 7825 7826 // return RValue<Int2>(int2); 7827 //} 7828 7829 //RValue<Int2> movdq2q(RValue<Int4> x) 7830 //{ 7831 // Value *long2 = Nucleus::createBitCast(x.value, Long2::getType()); 7832 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0)); 7833 7834 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType())); 7835 //} 7836 7837 RValue<Int4> pmovzxbd(RValue<Int4> x) 7838 { 7839 Module *module = Nucleus::getModule(); 7840 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxbd); 7841 7842 return RValue<Int4>(Nucleus::createCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))); 7843 } 7844 7845 RValue<Int4> pmovsxbd(RValue<Int4> x) 7846 { 7847 Module *module = Nucleus::getModule(); 7848 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxbd); 7849 7850 return RValue<Int4>(Nucleus::createCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))); 7851 } 7852 7853 RValue<Int4> pmovzxwd(RValue<Int4> x) 7854 { 7855 Module *module = Nucleus::getModule(); 7856 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxwd); 7857 7858 return RValue<Int4>(Nucleus::createCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))); 7859 } 7860 7861 RValue<Int4> pmovsxwd(RValue<Int4> x) 7862 { 7863 Module *module = Nucleus::getModule(); 7864 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxwd); 7865 7866 return RValue<Int4>(Nucleus::createCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))); 7867 } 7868 7869 void emms() 7870 { 7871 Module *module = Nucleus::getModule(); 7872 llvm::Function *emms = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_emms); 7873 7874 Nucleus::createCall(emms); 7875 } 7876 } 7877 } 7878