1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Reactor.hpp" 16 17 #include "x86.hpp" 18 #include "CPUID.hpp" 19 #include "Thread.hpp" 20 #include "ExecutableMemory.hpp" 21 #include "MutexLock.hpp" 22 23 #undef min 24 #undef max 25 26 #if REACTOR_LLVM_VERSION < 7 27 #include "llvm/Analysis/LoopPass.h" 28 #include "llvm/Constants.h" 29 #include "llvm/Function.h" 30 #include "llvm/GlobalVariable.h" 31 #include "llvm/Intrinsics.h" 32 #include "llvm/LLVMContext.h" 33 #include "llvm/Module.h" 34 #include "llvm/PassManager.h" 35 #include "llvm/Support/IRBuilder.h" 36 #include "llvm/Support/TargetSelect.h" 37 #include "llvm/Target/TargetData.h" 38 #include "llvm/Target/TargetOptions.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "../lib/ExecutionEngine/JIT/JIT.h" 41 42 #include "LLVMRoutine.hpp" 43 #include "LLVMRoutineManager.hpp" 44 45 #define ARGS(...) __VA_ARGS__ 46 #else 47 #include "llvm/Analysis/LoopPass.h" 48 #include "llvm/ExecutionEngine/ExecutionEngine.h" 49 #include "llvm/ExecutionEngine/JITSymbol.h" 50 #include "llvm/ExecutionEngine/Orc/CompileUtils.h" 51 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" 52 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" 53 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" 54 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" 55 #include "llvm/ExecutionEngine/SectionMemoryManager.h" 56 #include "llvm/IR/Constants.h" 57 #include "llvm/IR/DataLayout.h" 58 #include "llvm/IR/Function.h" 59 #include "llvm/IR/GlobalVariable.h" 60 #include "llvm/IR/IRBuilder.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/LegacyPassManager.h" 64 #include "llvm/IR/Mangler.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/Support/Error.h" 67 #include "llvm/Support/TargetSelect.h" 68 #include "llvm/Target/TargetOptions.h" 69 #include "llvm/Transforms/InstCombine/InstCombine.h" 70 #include "llvm/Transforms/Scalar.h" 71 #include "llvm/Transforms/Scalar/GVN.h" 72 73 #include "LLVMRoutine.hpp" 74 75 #define ARGS(...) {__VA_ARGS__} 76 #define CreateCall2 CreateCall 77 #define CreateCall3 CreateCall 78 79 #include <unordered_map> 80 #endif 81 82 #include <numeric> 83 #include <fstream> 84 85 #if defined(__i386__) || defined(__x86_64__) 86 #include <xmmintrin.h> 87 #endif 88 89 #include <math.h> 90 91 #if defined(__x86_64__) && defined(_WIN32) 92 extern "C" void X86CompilationCallback() 93 { 94 assert(false); // UNIMPLEMENTED 95 } 96 #endif 97 98 #if REACTOR_LLVM_VERSION < 7 99 namespace llvm 100 { 101 extern bool JITEmitDebugInfo; 102 } 103 #endif 104 105 namespace rr 106 { 107 class LLVMReactorJIT; 108 } 109 110 namespace 111 { 112 rr::LLVMReactorJIT *reactorJIT = nullptr; 113 llvm::IRBuilder<> *builder = nullptr; 114 llvm::LLVMContext *context = nullptr; 115 llvm::Module *module = nullptr; 116 llvm::Function *function = nullptr; 117 118 rr::MutexLock codegenMutex; 119 120 #if REACTOR_LLVM_VERSION >= 7 121 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y) 122 { 123 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 124 125 llvm::VectorType *extTy = 126 llvm::VectorType::getExtendedElementVectorType(ty); 127 x = ::builder->CreateZExt(x, extTy); 128 y = ::builder->CreateZExt(y, extTy); 129 130 // (x + y + 1) >> 1 131 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1); 132 llvm::Value *res = ::builder->CreateAdd(x, y); 133 res = ::builder->CreateAdd(res, one); 134 res = ::builder->CreateLShr(res, one); 135 return ::builder->CreateTrunc(res, ty); 136 } 137 138 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y, 139 llvm::ICmpInst::Predicate pred) 140 { 141 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y); 142 } 143 144 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x, 145 llvm::Value *y, llvm::Type *dstTy) 146 { 147 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, ""); 148 } 149 150 #if defined(__i386__) || defined(__x86_64__) 151 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext) 152 { 153 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType()); 154 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType); 155 156 llvm::Value *undef = llvm::UndefValue::get(srcTy); 157 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements()); 158 std::iota(mask.begin(), mask.end(), 0); 159 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask); 160 161 return sext ? ::builder->CreateSExt(v, dstTy) 162 : ::builder->CreateZExt(v, dstTy); 163 } 164 165 llvm::Value *lowerPABS(llvm::Value *v) 166 { 167 llvm::Value *zero = llvm::Constant::getNullValue(v->getType()); 168 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero); 169 llvm::Value *neg = ::builder->CreateNeg(v); 170 return ::builder->CreateSelect(cmp, v, neg); 171 } 172 #endif // defined(__i386__) || defined(__x86_64__) 173 174 #if !defined(__i386__) && !defined(__x86_64__) 175 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y, 176 llvm::FCmpInst::Predicate pred) 177 { 178 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y); 179 } 180 181 llvm::Value *lowerRound(llvm::Value *x) 182 { 183 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration( 184 ::module, llvm::Intrinsic::nearbyint, {x->getType()}); 185 return ::builder->CreateCall(nearbyint, ARGS(x)); 186 } 187 188 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty) 189 { 190 return ::builder->CreateFPToSI(lowerRound(x), ty); 191 } 192 193 llvm::Value *lowerFloor(llvm::Value *x) 194 { 195 llvm::Function *floor = llvm::Intrinsic::getDeclaration( 196 ::module, llvm::Intrinsic::floor, {x->getType()}); 197 return ::builder->CreateCall(floor, ARGS(x)); 198 } 199 200 llvm::Value *lowerTrunc(llvm::Value *x) 201 { 202 llvm::Function *trunc = llvm::Intrinsic::getDeclaration( 203 ::module, llvm::Intrinsic::trunc, {x->getType()}); 204 return ::builder->CreateCall(trunc, ARGS(x)); 205 } 206 207 // Packed add/sub saturatation 208 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned) 209 { 210 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 211 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty); 212 213 unsigned numBits = ty->getScalarSizeInBits(); 214 215 llvm::Value *max, *min, *extX, *extY; 216 if (isSigned) 217 { 218 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true); 219 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true); 220 extX = ::builder->CreateSExt(x, extTy); 221 extY = ::builder->CreateSExt(y, extTy); 222 } 223 else 224 { 225 assert(numBits <= 64); 226 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1; 227 max = llvm::ConstantInt::get(extTy, maxVal, false); 228 min = llvm::ConstantInt::get(extTy, 0, false); 229 extX = ::builder->CreateZExt(x, extTy); 230 extY = ::builder->CreateZExt(y, extTy); 231 } 232 233 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY) 234 : ::builder->CreateSub(extX, extY); 235 236 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT); 237 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT); 238 239 return ::builder->CreateTrunc(res, ty); 240 } 241 242 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y) 243 { 244 return lowerPSAT(x, y, true, false); 245 } 246 247 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y) 248 { 249 return lowerPSAT(x, y, true, true); 250 } 251 252 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y) 253 { 254 return lowerPSAT(x, y, false, false); 255 } 256 257 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y) 258 { 259 return lowerPSAT(x, y, false, true); 260 } 261 262 llvm::Value *lowerSQRT(llvm::Value *x) 263 { 264 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration( 265 ::module, llvm::Intrinsic::sqrt, {x->getType()}); 266 return ::builder->CreateCall(sqrt, ARGS(x)); 267 } 268 269 llvm::Value *lowerRCP(llvm::Value *x) 270 { 271 llvm::Type *ty = x->getType(); 272 llvm::Constant *one; 273 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty)) 274 { 275 one = llvm::ConstantVector::getSplat( 276 vectorTy->getNumElements(), 277 llvm::ConstantFP::get(vectorTy->getElementType(), 1)); 278 } 279 else 280 { 281 one = llvm::ConstantFP::get(ty, 1); 282 } 283 return ::builder->CreateFDiv(one, x); 284 } 285 286 llvm::Value *lowerRSQRT(llvm::Value *x) 287 { 288 return lowerRCP(lowerSQRT(x)); 289 } 290 291 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY) 292 { 293 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 294 llvm::Value *y = llvm::ConstantVector::getSplat( 295 ty->getNumElements(), 296 llvm::ConstantInt::get(ty->getElementType(), scalarY)); 297 return ::builder->CreateShl(x, y); 298 } 299 300 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY) 301 { 302 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 303 llvm::Value *y = llvm::ConstantVector::getSplat( 304 ty->getNumElements(), 305 llvm::ConstantInt::get(ty->getElementType(), scalarY)); 306 return ::builder->CreateAShr(x, y); 307 } 308 309 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY) 310 { 311 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 312 llvm::Value *y = llvm::ConstantVector::getSplat( 313 ty->getNumElements(), 314 llvm::ConstantInt::get(ty->getElementType(), scalarY)); 315 return ::builder->CreateLShr(x, y); 316 } 317 318 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y) 319 { 320 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 321 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty); 322 323 llvm::Value *extX = ::builder->CreateSExt(x, extTy); 324 llvm::Value *extY = ::builder->CreateSExt(y, extTy); 325 llvm::Value *mult = ::builder->CreateMul(extX, extY); 326 327 llvm::Value *undef = llvm::UndefValue::get(extTy); 328 329 llvm::SmallVector<uint32_t, 16> evenIdx; 330 llvm::SmallVector<uint32_t, 16> oddIdx; 331 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2) 332 { 333 evenIdx.push_back(i); 334 oddIdx.push_back(i + 1); 335 } 336 337 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx); 338 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx); 339 return ::builder->CreateAdd(lhs, rhs); 340 } 341 342 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext) 343 { 344 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 345 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty); 346 347 llvm::Value *extX, *extY; 348 if (sext) 349 { 350 extX = ::builder->CreateSExt(x, extTy); 351 extY = ::builder->CreateSExt(y, extTy); 352 } 353 else 354 { 355 extX = ::builder->CreateZExt(x, extTy); 356 extY = ::builder->CreateZExt(y, extTy); 357 } 358 359 llvm::Value *mult = ::builder->CreateMul(extX, extY); 360 361 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType()); 362 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getIntegerBitWidth()); 363 return ::builder->CreateTrunc(mulh, ty); 364 } 365 366 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned) 367 { 368 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType()); 369 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy); 370 371 llvm::IntegerType *dstElemTy = 372 llvm::cast<llvm::IntegerType>(dstTy->getElementType()); 373 374 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth(); 375 assert(truncNumBits < 64 && "shift 64 must be handled separately"); 376 llvm::Constant *max, *min; 377 if (isSigned) 378 { 379 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true); 380 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true); 381 } 382 else 383 { 384 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false); 385 min = llvm::ConstantInt::get(srcTy, 0, false); 386 } 387 388 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT); 389 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT); 390 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT); 391 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT); 392 393 x = ::builder->CreateTrunc(x, dstTy); 394 y = ::builder->CreateTrunc(y, dstTy); 395 396 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2); 397 std::iota(index.begin(), index.end(), 0); 398 399 return ::builder->CreateShuffleVector(x, y, index); 400 } 401 402 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy) 403 { 404 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 405 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0); 406 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero); 407 408 llvm::Value *ret = ::builder->CreateZExt( 409 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy); 410 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i) 411 { 412 llvm::Value *elem = ::builder->CreateZExt( 413 ::builder->CreateExtractElement(cmp, i), retTy); 414 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i)); 415 } 416 return ret; 417 } 418 419 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy) 420 { 421 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); 422 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0); 423 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero); 424 425 llvm::Value *ret = ::builder->CreateZExt( 426 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy); 427 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i) 428 { 429 llvm::Value *elem = ::builder->CreateZExt( 430 ::builder->CreateExtractElement(cmp, i), retTy); 431 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i)); 432 } 433 return ret; 434 } 435 #endif // !defined(__i386__) && !defined(__x86_64__) 436 #endif // REACTOR_LLVM_VERSION >= 7 437 } 438 439 namespace rr 440 { 441 #if REACTOR_LLVM_VERSION < 7 442 class LLVMReactorJIT 443 { 444 private: 445 std::string arch; 446 llvm::SmallVector<std::string, 16> mattrs; 447 llvm::ExecutionEngine *executionEngine; 448 LLVMRoutineManager *routineManager; 449 450 public: 451 LLVMReactorJIT(const std::string &arch_, 452 const llvm::SmallVectorImpl<std::string> &mattrs_) : 453 arch(arch_), 454 mattrs(mattrs_.begin(), mattrs_.end()), 455 executionEngine(nullptr), 456 routineManager(nullptr) 457 { 458 } 459 460 void startSession() 461 { 462 std::string error; 463 464 ::module = new llvm::Module("", *::context); 465 466 routineManager = new LLVMRoutineManager(); 467 468 llvm::TargetMachine *targetMachine = 469 llvm::EngineBuilder::selectTarget( 470 ::module, arch, "", mattrs, llvm::Reloc::Default, 471 llvm::CodeModel::JITDefault, &error); 472 473 executionEngine = llvm::JIT::createJIT( 474 ::module, &error, routineManager, llvm::CodeGenOpt::Aggressive, 475 true, targetMachine); 476 } 477 478 void endSession() 479 { 480 delete executionEngine; 481 executionEngine = nullptr; 482 routineManager = nullptr; 483 484 ::function = nullptr; 485 ::module = nullptr; 486 } 487 488 LLVMRoutine *acquireRoutine(llvm::Function *func) 489 { 490 void *entry = executionEngine->getPointerToFunction(::function); 491 return routineManager->acquireRoutine(entry); 492 } 493 494 void optimize(llvm::Module *module) 495 { 496 static llvm::PassManager *passManager = nullptr; 497 498 if(!passManager) 499 { 500 passManager = new llvm::PassManager(); 501 502 passManager->add(new llvm::TargetData(*executionEngine->getTargetData())); 503 passManager->add(llvm::createScalarReplAggregatesPass()); 504 505 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 506 { 507 switch(optimization[pass]) 508 { 509 case Disabled: break; 510 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; 511 case LICM: passManager->add(llvm::createLICMPass()); break; 512 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; 513 case GVN: passManager->add(llvm::createGVNPass()); break; 514 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; 515 case Reassociate: passManager->add(llvm::createReassociatePass()); break; 516 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; 517 case SCCP: passManager->add(llvm::createSCCPPass()); break; 518 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break; 519 default: 520 assert(false); 521 } 522 } 523 } 524 525 passManager->run(*::module); 526 } 527 }; 528 #else 529 class ExternalFunctionSymbolResolver 530 { 531 private: 532 using FunctionMap = std::unordered_map<std::string, void *>; 533 FunctionMap func_; 534 535 public: 536 ExternalFunctionSymbolResolver() 537 { 538 func_.emplace("floorf", reinterpret_cast<void*>(floorf)); 539 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf)); 540 func_.emplace("truncf", reinterpret_cast<void*>(truncf)); 541 } 542 543 void *findSymbol(const std::string &name) const 544 { 545 FunctionMap::const_iterator it = func_.find(name); 546 return (it != func_.end()) ? it->second : nullptr; 547 } 548 }; 549 550 class LLVMReactorJIT 551 { 552 private: 553 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer; 554 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>; 555 556 llvm::orc::ExecutionSession session; 557 ExternalFunctionSymbolResolver externalSymbolResolver; 558 std::shared_ptr<llvm::orc::SymbolResolver> resolver; 559 std::unique_ptr<llvm::TargetMachine> targetMachine; 560 const llvm::DataLayout dataLayout; 561 ObjLayer objLayer; 562 CompileLayer compileLayer; 563 size_t emittedFunctionsNum; 564 565 public: 566 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs, 567 const llvm::TargetOptions &targetOpts): 568 resolver(createLegacyLookupResolver( 569 session, 570 [this](const std::string &name) { 571 void *func = externalSymbolResolver.findSymbol(name); 572 if (func != nullptr) 573 { 574 return llvm::JITSymbol( 575 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute); 576 } 577 578 return objLayer.findSymbol(name, true); 579 }, 580 [](llvm::Error err) { 581 if (err) 582 { 583 // TODO: Log the symbol resolution errors. 584 return; 585 } 586 })), 587 targetMachine(llvm::EngineBuilder() 588 .setMArch(arch) 589 .setMAttrs(mattrs) 590 .setTargetOptions(targetOpts) 591 .selectTarget()), 592 dataLayout(targetMachine->createDataLayout()), 593 objLayer( 594 session, 595 [this](llvm::orc::VModuleKey) { 596 return ObjLayer::Resources{ 597 std::make_shared<llvm::SectionMemoryManager>(), 598 resolver}; 599 }), 600 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)), 601 emittedFunctionsNum(0) 602 { 603 } 604 605 void startSession() 606 { 607 ::module = new llvm::Module("", *::context); 608 } 609 610 void endSession() 611 { 612 ::function = nullptr; 613 ::module = nullptr; 614 } 615 616 LLVMRoutine *acquireRoutine(llvm::Function *func) 617 { 618 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str(); 619 func->setName(name); 620 func->setLinkage(llvm::GlobalValue::ExternalLinkage); 621 func->setDoesNotThrow(); 622 623 std::unique_ptr<llvm::Module> mod(::module); 624 ::module = nullptr; 625 mod->setDataLayout(dataLayout); 626 627 auto moduleKey = session.allocateVModule(); 628 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod))); 629 630 std::string mangledName; 631 { 632 llvm::raw_string_ostream mangledNameStream(mangledName); 633 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout); 634 } 635 636 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false); 637 638 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress(); 639 if(!expectAddr) 640 { 641 return nullptr; 642 } 643 644 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get())); 645 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey); 646 } 647 648 void optimize(llvm::Module *module) 649 { 650 std::unique_ptr<llvm::legacy::PassManager> passManager( 651 new llvm::legacy::PassManager()); 652 653 passManager->add(llvm::createSROAPass()); 654 655 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 656 { 657 switch(optimization[pass]) 658 { 659 case Disabled: break; 660 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; 661 case LICM: passManager->add(llvm::createLICMPass()); break; 662 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; 663 case GVN: passManager->add(llvm::createGVNPass()); break; 664 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; 665 case Reassociate: passManager->add(llvm::createReassociatePass()); break; 666 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; 667 case SCCP: passManager->add(llvm::createSCCPPass()); break; 668 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break; 669 default: 670 assert(false); 671 } 672 } 673 674 passManager->run(*::module); 675 } 676 677 private: 678 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey) 679 { 680 llvm::cantFail(compileLayer.removeModule(moduleKey)); 681 } 682 683 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey) 684 { 685 jit->releaseRoutineModule(moduleKey); 686 } 687 }; 688 #endif 689 690 Optimization optimization[10] = {InstructionCombining, Disabled}; 691 692 enum EmulatedType 693 { 694 Type_v2i32, 695 Type_v4i16, 696 Type_v2i16, 697 Type_v8i8, 698 Type_v4i8, 699 Type_v2f32, 700 EmulatedTypeCount 701 }; 702 703 llvm::Type *T(Type *t) 704 { 705 uintptr_t type = reinterpret_cast<uintptr_t>(t); 706 if(type < EmulatedTypeCount) 707 { 708 // Use 128-bit vectors to implement logically shorter ones. 709 switch(type) 710 { 711 case Type_v2i32: return T(Int4::getType()); 712 case Type_v4i16: return T(Short8::getType()); 713 case Type_v2i16: return T(Short8::getType()); 714 case Type_v8i8: return T(Byte16::getType()); 715 case Type_v4i8: return T(Byte16::getType()); 716 case Type_v2f32: return T(Float4::getType()); 717 default: assert(false); 718 } 719 } 720 721 return reinterpret_cast<llvm::Type*>(t); 722 } 723 724 inline Type *T(llvm::Type *t) 725 { 726 return reinterpret_cast<Type*>(t); 727 } 728 729 Type *T(EmulatedType t) 730 { 731 return reinterpret_cast<Type*>(t); 732 } 733 734 inline llvm::Value *V(Value *t) 735 { 736 return reinterpret_cast<llvm::Value*>(t); 737 } 738 739 inline Value *V(llvm::Value *t) 740 { 741 return reinterpret_cast<Value*>(t); 742 } 743 744 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t) 745 { 746 return reinterpret_cast<std::vector<llvm::Type*>&>(t); 747 } 748 749 inline llvm::BasicBlock *B(BasicBlock *t) 750 { 751 return reinterpret_cast<llvm::BasicBlock*>(t); 752 } 753 754 inline BasicBlock *B(llvm::BasicBlock *t) 755 { 756 return reinterpret_cast<BasicBlock*>(t); 757 } 758 759 static size_t typeSize(Type *type) 760 { 761 uintptr_t t = reinterpret_cast<uintptr_t>(type); 762 if(t < EmulatedTypeCount) 763 { 764 switch(t) 765 { 766 case Type_v2i32: return 8; 767 case Type_v4i16: return 8; 768 case Type_v2i16: return 4; 769 case Type_v8i8: return 8; 770 case Type_v4i8: return 4; 771 case Type_v2f32: return 8; 772 default: assert(false); 773 } 774 } 775 776 return T(type)->getPrimitiveSizeInBits() / 8; 777 } 778 779 static unsigned int elementCount(Type *type) 780 { 781 uintptr_t t = reinterpret_cast<uintptr_t>(type); 782 if(t < EmulatedTypeCount) 783 { 784 switch(t) 785 { 786 case Type_v2i32: return 2; 787 case Type_v4i16: return 4; 788 case Type_v2i16: return 2; 789 case Type_v8i8: return 8; 790 case Type_v4i8: return 4; 791 case Type_v2f32: return 2; 792 default: assert(false); 793 } 794 } 795 796 return llvm::cast<llvm::VectorType>(T(type))->getNumElements(); 797 } 798 799 Nucleus::Nucleus() 800 { 801 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe 802 803 llvm::InitializeNativeTarget(); 804 805 #if REACTOR_LLVM_VERSION >= 7 806 llvm::InitializeNativeTargetAsmPrinter(); 807 llvm::InitializeNativeTargetAsmParser(); 808 #endif 809 810 if(!::context) 811 { 812 ::context = new llvm::LLVMContext(); 813 } 814 815 #if defined(__x86_64__) 816 static const char arch[] = "x86-64"; 817 #elif defined(__i386__) 818 static const char arch[] = "x86"; 819 #elif defined(__aarch64__) 820 static const char arch[] = "arm64"; 821 #elif defined(__arm__) 822 static const char arch[] = "arm"; 823 #elif defined(__mips__) 824 #if defined(__mips64) 825 static const char arch[] = "mips64el"; 826 #else 827 static const char arch[] = "mipsel"; 828 #endif 829 #else 830 #error "unknown architecture" 831 #endif 832 833 llvm::SmallVector<std::string, 1> mattrs; 834 #if defined(__i386__) || defined(__x86_64__) 835 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx"); 836 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov"); 837 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse"); 838 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2"); 839 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3"); 840 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3"); 841 #if REACTOR_LLVM_VERSION < 7 842 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41"); 843 #else 844 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1"); 845 #endif 846 #elif defined(__arm__) 847 #if __ARM_ARCH >= 8 848 mattrs.push_back("+armv8-a"); 849 #else 850 // armv7-a requires compiler-rt routines; otherwise, compiled kernel 851 // might fail to link. 852 #endif 853 #endif 854 855 #if REACTOR_LLVM_VERSION < 7 856 llvm::JITEmitDebugInfo = false; 857 llvm::UnsafeFPMath = true; 858 // llvm::NoInfsFPMath = true; 859 // llvm::NoNaNsFPMath = true; 860 #else 861 llvm::TargetOptions targetOpts; 862 targetOpts.UnsafeFPMath = false; 863 // targetOpts.NoInfsFPMath = true; 864 // targetOpts.NoNaNsFPMath = true; 865 #endif 866 867 if(!::reactorJIT) 868 { 869 #if REACTOR_LLVM_VERSION < 7 870 ::reactorJIT = new LLVMReactorJIT(arch, mattrs); 871 #else 872 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts); 873 #endif 874 } 875 876 ::reactorJIT->startSession(); 877 878 if(!::builder) 879 { 880 ::builder = new llvm::IRBuilder<>(*::context); 881 } 882 } 883 884 Nucleus::~Nucleus() 885 { 886 ::reactorJIT->endSession(); 887 888 ::codegenMutex.unlock(); 889 } 890 891 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations) 892 { 893 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator()) 894 { 895 llvm::Type *type = ::function->getReturnType(); 896 897 if(type->isVoidTy()) 898 { 899 createRetVoid(); 900 } 901 else 902 { 903 createRet(V(llvm::UndefValue::get(type))); 904 } 905 } 906 907 if(false) 908 { 909 #if REACTOR_LLVM_VERSION < 7 910 std::string error; 911 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error); 912 #else 913 std::error_code error; 914 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error); 915 #endif 916 917 ::module->print(file, 0); 918 } 919 920 if(runOptimizations) 921 { 922 optimize(); 923 } 924 925 if(false) 926 { 927 #if REACTOR_LLVM_VERSION < 7 928 std::string error; 929 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error); 930 #else 931 std::error_code error; 932 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error); 933 #endif 934 935 ::module->print(file, 0); 936 } 937 938 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function); 939 940 return routine; 941 } 942 943 void Nucleus::optimize() 944 { 945 ::reactorJIT->optimize(::module); 946 } 947 948 Value *Nucleus::allocateStackVariable(Type *type, int arraySize) 949 { 950 // Need to allocate it in the entry block for mem2reg to work 951 llvm::BasicBlock &entryBlock = ::function->getEntryBlock(); 952 953 llvm::Instruction *declaration; 954 955 if(arraySize) 956 { 957 #if REACTOR_LLVM_VERSION < 7 958 declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize))); 959 #else 960 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize))); 961 #endif 962 } 963 else 964 { 965 #if REACTOR_LLVM_VERSION < 7 966 declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr); 967 #else 968 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr); 969 #endif 970 } 971 972 entryBlock.getInstList().push_front(declaration); 973 974 return V(declaration); 975 } 976 977 BasicBlock *Nucleus::createBasicBlock() 978 { 979 return B(llvm::BasicBlock::Create(*::context, "", ::function)); 980 } 981 982 BasicBlock *Nucleus::getInsertBlock() 983 { 984 return B(::builder->GetInsertBlock()); 985 } 986 987 void Nucleus::setInsertBlock(BasicBlock *basicBlock) 988 { 989 // assert(::builder->GetInsertBlock()->back().isTerminator()); 990 ::builder->SetInsertPoint(B(basicBlock)); 991 } 992 993 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params) 994 { 995 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false); 996 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module); 997 ::function->setCallingConv(llvm::CallingConv::C); 998 999 #if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7 1000 // FIXME(capn): 1001 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page 1002 // having a trap which allows the OS to grow the stack. For functions with a stack frame 1003 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard 1004 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe 1005 // the stack and ensure all pages have been committed. This is currently broken in LLVM 1006 // JIT, but we can prevent emitting the stack probe call: 1007 ::function->addFnAttr("stack-probe-size", "1048576"); 1008 #endif 1009 1010 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function)); 1011 } 1012 1013 Value *Nucleus::getArgument(unsigned int index) 1014 { 1015 llvm::Function::arg_iterator args = ::function->arg_begin(); 1016 1017 while(index) 1018 { 1019 args++; 1020 index--; 1021 } 1022 1023 return V(&*args); 1024 } 1025 1026 void Nucleus::createRetVoid() 1027 { 1028 ::builder->CreateRetVoid(); 1029 } 1030 1031 void Nucleus::createRet(Value *v) 1032 { 1033 ::builder->CreateRet(V(v)); 1034 } 1035 1036 void Nucleus::createBr(BasicBlock *dest) 1037 { 1038 ::builder->CreateBr(B(dest)); 1039 } 1040 1041 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse) 1042 { 1043 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse)); 1044 } 1045 1046 Value *Nucleus::createAdd(Value *lhs, Value *rhs) 1047 { 1048 return V(::builder->CreateAdd(V(lhs), V(rhs))); 1049 } 1050 1051 Value *Nucleus::createSub(Value *lhs, Value *rhs) 1052 { 1053 return V(::builder->CreateSub(V(lhs), V(rhs))); 1054 } 1055 1056 Value *Nucleus::createMul(Value *lhs, Value *rhs) 1057 { 1058 return V(::builder->CreateMul(V(lhs), V(rhs))); 1059 } 1060 1061 Value *Nucleus::createUDiv(Value *lhs, Value *rhs) 1062 { 1063 return V(::builder->CreateUDiv(V(lhs), V(rhs))); 1064 } 1065 1066 Value *Nucleus::createSDiv(Value *lhs, Value *rhs) 1067 { 1068 return V(::builder->CreateSDiv(V(lhs), V(rhs))); 1069 } 1070 1071 Value *Nucleus::createFAdd(Value *lhs, Value *rhs) 1072 { 1073 return V(::builder->CreateFAdd(V(lhs), V(rhs))); 1074 } 1075 1076 Value *Nucleus::createFSub(Value *lhs, Value *rhs) 1077 { 1078 return V(::builder->CreateFSub(V(lhs), V(rhs))); 1079 } 1080 1081 Value *Nucleus::createFMul(Value *lhs, Value *rhs) 1082 { 1083 return V(::builder->CreateFMul(V(lhs), V(rhs))); 1084 } 1085 1086 Value *Nucleus::createFDiv(Value *lhs, Value *rhs) 1087 { 1088 return V(::builder->CreateFDiv(V(lhs), V(rhs))); 1089 } 1090 1091 Value *Nucleus::createURem(Value *lhs, Value *rhs) 1092 { 1093 return V(::builder->CreateURem(V(lhs), V(rhs))); 1094 } 1095 1096 Value *Nucleus::createSRem(Value *lhs, Value *rhs) 1097 { 1098 return V(::builder->CreateSRem(V(lhs), V(rhs))); 1099 } 1100 1101 Value *Nucleus::createFRem(Value *lhs, Value *rhs) 1102 { 1103 return V(::builder->CreateFRem(V(lhs), V(rhs))); 1104 } 1105 1106 Value *Nucleus::createShl(Value *lhs, Value *rhs) 1107 { 1108 return V(::builder->CreateShl(V(lhs), V(rhs))); 1109 } 1110 1111 Value *Nucleus::createLShr(Value *lhs, Value *rhs) 1112 { 1113 return V(::builder->CreateLShr(V(lhs), V(rhs))); 1114 } 1115 1116 Value *Nucleus::createAShr(Value *lhs, Value *rhs) 1117 { 1118 return V(::builder->CreateAShr(V(lhs), V(rhs))); 1119 } 1120 1121 Value *Nucleus::createAnd(Value *lhs, Value *rhs) 1122 { 1123 return V(::builder->CreateAnd(V(lhs), V(rhs))); 1124 } 1125 1126 Value *Nucleus::createOr(Value *lhs, Value *rhs) 1127 { 1128 return V(::builder->CreateOr(V(lhs), V(rhs))); 1129 } 1130 1131 Value *Nucleus::createXor(Value *lhs, Value *rhs) 1132 { 1133 return V(::builder->CreateXor(V(lhs), V(rhs))); 1134 } 1135 1136 Value *Nucleus::createNeg(Value *v) 1137 { 1138 return V(::builder->CreateNeg(V(v))); 1139 } 1140 1141 Value *Nucleus::createFNeg(Value *v) 1142 { 1143 return V(::builder->CreateFNeg(V(v))); 1144 } 1145 1146 Value *Nucleus::createNot(Value *v) 1147 { 1148 return V(::builder->CreateNot(V(v))); 1149 } 1150 1151 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 1152 { 1153 uintptr_t t = reinterpret_cast<uintptr_t>(type); 1154 if(t < EmulatedTypeCount) 1155 { 1156 switch(t) 1157 { 1158 case Type_v2i32: 1159 case Type_v4i16: 1160 case Type_v8i8: 1161 case Type_v2f32: 1162 return createBitCast( 1163 createInsertElement( 1164 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), 1165 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 1166 0), 1167 type); 1168 case Type_v2i16: 1169 case Type_v4i8: 1170 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 1171 { 1172 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))); 1173 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment); 1174 i = createZExt(i, Long::getType()); 1175 Value *v = createInsertElement(u, i, 0); 1176 return createBitCast(v, type); 1177 } 1178 break; 1179 default: 1180 assert(false); 1181 } 1182 } 1183 1184 assert(V(ptr)->getType()->getContainedType(0) == T(type)); 1185 return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment))); 1186 } 1187 1188 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment) 1189 { 1190 uintptr_t t = reinterpret_cast<uintptr_t>(type); 1191 if(t < EmulatedTypeCount) 1192 { 1193 switch(t) 1194 { 1195 case Type_v2i32: 1196 case Type_v4i16: 1197 case Type_v8i8: 1198 case Type_v2f32: 1199 createStore( 1200 createExtractElement( 1201 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), 1202 createBitCast(ptr, Pointer<Long>::getType()), 1203 Long::getType(), isVolatile, alignment); 1204 return value; 1205 case Type_v2i16: 1206 case Type_v4i8: 1207 if(alignment != 0) // Not a local variable (all vectors are 128-bit). 1208 { 1209 createStore( 1210 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), 1211 createBitCast(ptr, Pointer<Int>::getType()), 1212 Int::getType(), isVolatile, alignment); 1213 return value; 1214 } 1215 break; 1216 default: 1217 assert(false); 1218 } 1219 } 1220 1221 assert(V(ptr)->getType()->getContainedType(0) == T(type)); 1222 ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment)); 1223 return value; 1224 } 1225 1226 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex) 1227 { 1228 if(sizeof(void*) == 8) 1229 { 1230 if(unsignedIndex) 1231 { 1232 index = createZExt(index, Long::getType()); 1233 } 1234 else 1235 { 1236 index = createSExt(index, Long::getType()); 1237 } 1238 1239 index = createMul(index, createConstantLong((int64_t)typeSize(type))); 1240 } 1241 else 1242 { 1243 index = createMul(index, createConstantInt((int)typeSize(type))); 1244 } 1245 1246 assert(V(ptr)->getType()->getContainedType(0) == T(type)); 1247 return createBitCast( 1248 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))), 1249 T(llvm::PointerType::get(T(type), 0))); 1250 } 1251 1252 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) 1253 { 1254 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent)); 1255 } 1256 1257 Value *Nucleus::createTrunc(Value *v, Type *destType) 1258 { 1259 return V(::builder->CreateTrunc(V(v), T(destType))); 1260 } 1261 1262 Value *Nucleus::createZExt(Value *v, Type *destType) 1263 { 1264 return V(::builder->CreateZExt(V(v), T(destType))); 1265 } 1266 1267 Value *Nucleus::createSExt(Value *v, Type *destType) 1268 { 1269 return V(::builder->CreateSExt(V(v), T(destType))); 1270 } 1271 1272 Value *Nucleus::createFPToSI(Value *v, Type *destType) 1273 { 1274 return V(::builder->CreateFPToSI(V(v), T(destType))); 1275 } 1276 1277 Value *Nucleus::createSIToFP(Value *v, Type *destType) 1278 { 1279 return V(::builder->CreateSIToFP(V(v), T(destType))); 1280 } 1281 1282 Value *Nucleus::createFPTrunc(Value *v, Type *destType) 1283 { 1284 return V(::builder->CreateFPTrunc(V(v), T(destType))); 1285 } 1286 1287 Value *Nucleus::createFPExt(Value *v, Type *destType) 1288 { 1289 return V(::builder->CreateFPExt(V(v), T(destType))); 1290 } 1291 1292 Value *Nucleus::createBitCast(Value *v, Type *destType) 1293 { 1294 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need 1295 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and 1296 // reading back as the destination type. 1297 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy()) 1298 { 1299 Value *readAddress = allocateStackVariable(destType); 1300 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0))); 1301 createStore(v, writeAddress, T(V(v)->getType())); 1302 return createLoad(readAddress, destType); 1303 } 1304 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy()) 1305 { 1306 Value *writeAddress = allocateStackVariable(T(V(v)->getType())); 1307 createStore(v, writeAddress, T(V(v)->getType())); 1308 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0))); 1309 return createLoad(readAddress, destType); 1310 } 1311 1312 return V(::builder->CreateBitCast(V(v), T(destType))); 1313 } 1314 1315 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) 1316 { 1317 return V(::builder->CreateICmpEQ(V(lhs), V(rhs))); 1318 } 1319 1320 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs) 1321 { 1322 return V(::builder->CreateICmpNE(V(lhs), V(rhs))); 1323 } 1324 1325 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs) 1326 { 1327 return V(::builder->CreateICmpUGT(V(lhs), V(rhs))); 1328 } 1329 1330 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs) 1331 { 1332 return V(::builder->CreateICmpUGE(V(lhs), V(rhs))); 1333 } 1334 1335 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs) 1336 { 1337 return V(::builder->CreateICmpULT(V(lhs), V(rhs))); 1338 } 1339 1340 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs) 1341 { 1342 return V(::builder->CreateICmpULE(V(lhs), V(rhs))); 1343 } 1344 1345 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs) 1346 { 1347 return V(::builder->CreateICmpSGT(V(lhs), V(rhs))); 1348 } 1349 1350 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs) 1351 { 1352 return V(::builder->CreateICmpSGE(V(lhs), V(rhs))); 1353 } 1354 1355 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs) 1356 { 1357 return V(::builder->CreateICmpSLT(V(lhs), V(rhs))); 1358 } 1359 1360 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs) 1361 { 1362 return V(::builder->CreateICmpSLE(V(lhs), V(rhs))); 1363 } 1364 1365 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs) 1366 { 1367 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs))); 1368 } 1369 1370 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs) 1371 { 1372 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs))); 1373 } 1374 1375 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs) 1376 { 1377 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs))); 1378 } 1379 1380 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs) 1381 { 1382 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs))); 1383 } 1384 1385 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs) 1386 { 1387 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs))); 1388 } 1389 1390 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs) 1391 { 1392 return V(::builder->CreateFCmpONE(V(lhs), V(rhs))); 1393 } 1394 1395 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs) 1396 { 1397 return V(::builder->CreateFCmpORD(V(lhs), V(rhs))); 1398 } 1399 1400 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs) 1401 { 1402 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs))); 1403 } 1404 1405 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs) 1406 { 1407 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs))); 1408 } 1409 1410 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs) 1411 { 1412 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs))); 1413 } 1414 1415 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs) 1416 { 1417 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs))); 1418 } 1419 1420 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs) 1421 { 1422 return V(::builder->CreateFCmpULT(V(lhs), V(rhs))); 1423 } 1424 1425 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs) 1426 { 1427 return V(::builder->CreateFCmpULE(V(lhs), V(rhs))); 1428 } 1429 1430 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs) 1431 { 1432 return V(::builder->CreateFCmpULE(V(lhs), V(rhs))); 1433 } 1434 1435 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index) 1436 { 1437 assert(V(vector)->getType()->getContainedType(0) == T(type)); 1438 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index)))); 1439 } 1440 1441 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index) 1442 { 1443 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index)))); 1444 } 1445 1446 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select) 1447 { 1448 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements(); 1449 const int maxSize = 16; 1450 llvm::Constant *swizzle[maxSize]; 1451 assert(size <= maxSize); 1452 1453 for(int i = 0; i < size; i++) 1454 { 1455 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]); 1456 } 1457 1458 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size)); 1459 1460 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle)); 1461 } 1462 1463 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse) 1464 { 1465 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse))); 1466 } 1467 1468 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases) 1469 { 1470 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases)); 1471 } 1472 1473 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch) 1474 { 1475 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases); 1476 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch)); 1477 } 1478 1479 void Nucleus::createUnreachable() 1480 { 1481 ::builder->CreateUnreachable(); 1482 } 1483 1484 static Value *createSwizzle4(Value *val, unsigned char select) 1485 { 1486 int swizzle[4] = 1487 { 1488 (select >> 0) & 0x03, 1489 (select >> 2) & 0x03, 1490 (select >> 4) & 0x03, 1491 (select >> 6) & 0x03, 1492 }; 1493 1494 return Nucleus::createShuffleVector(val, val, swizzle); 1495 } 1496 1497 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select) 1498 { 1499 bool mask[4] = {false, false, false, false}; 1500 1501 mask[(select >> 0) & 0x03] = true; 1502 mask[(select >> 2) & 0x03] = true; 1503 mask[(select >> 4) & 0x03] = true; 1504 mask[(select >> 6) & 0x03] = true; 1505 1506 int swizzle[4] = 1507 { 1508 mask[0] ? 4 : 0, 1509 mask[1] ? 5 : 1, 1510 mask[2] ? 6 : 2, 1511 mask[3] ? 7 : 3, 1512 }; 1513 1514 return Nucleus::createShuffleVector(lhs, rhs, swizzle); 1515 } 1516 1517 Type *Nucleus::getPointerType(Type *ElementType) 1518 { 1519 return T(llvm::PointerType::get(T(ElementType), 0)); 1520 } 1521 1522 Value *Nucleus::createNullValue(Type *Ty) 1523 { 1524 return V(llvm::Constant::getNullValue(T(Ty))); 1525 } 1526 1527 Value *Nucleus::createConstantLong(int64_t i) 1528 { 1529 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true)); 1530 } 1531 1532 Value *Nucleus::createConstantInt(int i) 1533 { 1534 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true)); 1535 } 1536 1537 Value *Nucleus::createConstantInt(unsigned int i) 1538 { 1539 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false)); 1540 } 1541 1542 Value *Nucleus::createConstantBool(bool b) 1543 { 1544 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b)); 1545 } 1546 1547 Value *Nucleus::createConstantByte(signed char i) 1548 { 1549 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true)); 1550 } 1551 1552 Value *Nucleus::createConstantByte(unsigned char i) 1553 { 1554 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false)); 1555 } 1556 1557 Value *Nucleus::createConstantShort(short i) 1558 { 1559 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true)); 1560 } 1561 1562 Value *Nucleus::createConstantShort(unsigned short i) 1563 { 1564 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false)); 1565 } 1566 1567 Value *Nucleus::createConstantFloat(float x) 1568 { 1569 return V(llvm::ConstantFP::get(T(Float::getType()), x)); 1570 } 1571 1572 Value *Nucleus::createNullPointer(Type *Ty) 1573 { 1574 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0))); 1575 } 1576 1577 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type) 1578 { 1579 assert(llvm::isa<llvm::VectorType>(T(type))); 1580 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 1581 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 1582 assert(numElements <= 16 && numConstants <= numElements); 1583 llvm::Constant *constantVector[16]; 1584 1585 for(int i = 0; i < numElements; i++) 1586 { 1587 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]); 1588 } 1589 1590 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 1591 } 1592 1593 Value *Nucleus::createConstantVector(const double *constants, Type *type) 1594 { 1595 assert(llvm::isa<llvm::VectorType>(T(type))); 1596 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type. 1597 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type. 1598 assert(numElements <= 8 && numConstants <= numElements); 1599 llvm::Constant *constantVector[8]; 1600 1601 for(int i = 0; i < numElements; i++) 1602 { 1603 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]); 1604 } 1605 1606 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements))); 1607 } 1608 1609 Type *Void::getType() 1610 { 1611 return T(llvm::Type::getVoidTy(*::context)); 1612 } 1613 1614 Bool::Bool(Argument<Bool> argument) 1615 { 1616 storeValue(argument.value); 1617 } 1618 1619 Bool::Bool(bool x) 1620 { 1621 storeValue(Nucleus::createConstantBool(x)); 1622 } 1623 1624 Bool::Bool(RValue<Bool> rhs) 1625 { 1626 storeValue(rhs.value); 1627 } 1628 1629 Bool::Bool(const Bool &rhs) 1630 { 1631 Value *value = rhs.loadValue(); 1632 storeValue(value); 1633 } 1634 1635 Bool::Bool(const Reference<Bool> &rhs) 1636 { 1637 Value *value = rhs.loadValue(); 1638 storeValue(value); 1639 } 1640 1641 RValue<Bool> Bool::operator=(RValue<Bool> rhs) 1642 { 1643 storeValue(rhs.value); 1644 1645 return rhs; 1646 } 1647 1648 RValue<Bool> Bool::operator=(const Bool &rhs) 1649 { 1650 Value *value = rhs.loadValue(); 1651 storeValue(value); 1652 1653 return RValue<Bool>(value); 1654 } 1655 1656 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) 1657 { 1658 Value *value = rhs.loadValue(); 1659 storeValue(value); 1660 1661 return RValue<Bool>(value); 1662 } 1663 1664 RValue<Bool> operator!(RValue<Bool> val) 1665 { 1666 return RValue<Bool>(Nucleus::createNot(val.value)); 1667 } 1668 1669 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs) 1670 { 1671 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value)); 1672 } 1673 1674 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs) 1675 { 1676 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value)); 1677 } 1678 1679 Type *Bool::getType() 1680 { 1681 return T(llvm::Type::getInt1Ty(*::context)); 1682 } 1683 1684 Byte::Byte(Argument<Byte> argument) 1685 { 1686 storeValue(argument.value); 1687 } 1688 1689 Byte::Byte(RValue<Int> cast) 1690 { 1691 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1692 1693 storeValue(integer); 1694 } 1695 1696 Byte::Byte(RValue<UInt> cast) 1697 { 1698 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1699 1700 storeValue(integer); 1701 } 1702 1703 Byte::Byte(RValue<UShort> cast) 1704 { 1705 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 1706 1707 storeValue(integer); 1708 } 1709 1710 Byte::Byte(int x) 1711 { 1712 storeValue(Nucleus::createConstantByte((unsigned char)x)); 1713 } 1714 1715 Byte::Byte(unsigned char x) 1716 { 1717 storeValue(Nucleus::createConstantByte(x)); 1718 } 1719 1720 Byte::Byte(RValue<Byte> rhs) 1721 { 1722 storeValue(rhs.value); 1723 } 1724 1725 Byte::Byte(const Byte &rhs) 1726 { 1727 Value *value = rhs.loadValue(); 1728 storeValue(value); 1729 } 1730 1731 Byte::Byte(const Reference<Byte> &rhs) 1732 { 1733 Value *value = rhs.loadValue(); 1734 storeValue(value); 1735 } 1736 1737 RValue<Byte> Byte::operator=(RValue<Byte> rhs) 1738 { 1739 storeValue(rhs.value); 1740 1741 return rhs; 1742 } 1743 1744 RValue<Byte> Byte::operator=(const Byte &rhs) 1745 { 1746 Value *value = rhs.loadValue(); 1747 storeValue(value); 1748 1749 return RValue<Byte>(value); 1750 } 1751 1752 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) 1753 { 1754 Value *value = rhs.loadValue(); 1755 storeValue(value); 1756 1757 return RValue<Byte>(value); 1758 } 1759 1760 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs) 1761 { 1762 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value)); 1763 } 1764 1765 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs) 1766 { 1767 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value)); 1768 } 1769 1770 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs) 1771 { 1772 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value)); 1773 } 1774 1775 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs) 1776 { 1777 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value)); 1778 } 1779 1780 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs) 1781 { 1782 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value)); 1783 } 1784 1785 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs) 1786 { 1787 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value)); 1788 } 1789 1790 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs) 1791 { 1792 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value)); 1793 } 1794 1795 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs) 1796 { 1797 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value)); 1798 } 1799 1800 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs) 1801 { 1802 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value)); 1803 } 1804 1805 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs) 1806 { 1807 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value)); 1808 } 1809 1810 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs) 1811 { 1812 return lhs = lhs + rhs; 1813 } 1814 1815 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs) 1816 { 1817 return lhs = lhs - rhs; 1818 } 1819 1820 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs) 1821 { 1822 return lhs = lhs * rhs; 1823 } 1824 1825 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs) 1826 { 1827 return lhs = lhs / rhs; 1828 } 1829 1830 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs) 1831 { 1832 return lhs = lhs % rhs; 1833 } 1834 1835 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs) 1836 { 1837 return lhs = lhs & rhs; 1838 } 1839 1840 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs) 1841 { 1842 return lhs = lhs | rhs; 1843 } 1844 1845 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs) 1846 { 1847 return lhs = lhs ^ rhs; 1848 } 1849 1850 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs) 1851 { 1852 return lhs = lhs << rhs; 1853 } 1854 1855 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs) 1856 { 1857 return lhs = lhs >> rhs; 1858 } 1859 1860 RValue<Byte> operator+(RValue<Byte> val) 1861 { 1862 return val; 1863 } 1864 1865 RValue<Byte> operator-(RValue<Byte> val) 1866 { 1867 return RValue<Byte>(Nucleus::createNeg(val.value)); 1868 } 1869 1870 RValue<Byte> operator~(RValue<Byte> val) 1871 { 1872 return RValue<Byte>(Nucleus::createNot(val.value)); 1873 } 1874 1875 RValue<Byte> operator++(Byte &val, int) // Post-increment 1876 { 1877 RValue<Byte> res = val; 1878 1879 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1)); 1880 val.storeValue(inc); 1881 1882 return res; 1883 } 1884 1885 const Byte &operator++(Byte &val) // Pre-increment 1886 { 1887 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1)); 1888 val.storeValue(inc); 1889 1890 return val; 1891 } 1892 1893 RValue<Byte> operator--(Byte &val, int) // Post-decrement 1894 { 1895 RValue<Byte> res = val; 1896 1897 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1)); 1898 val.storeValue(inc); 1899 1900 return res; 1901 } 1902 1903 const Byte &operator--(Byte &val) // Pre-decrement 1904 { 1905 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1)); 1906 val.storeValue(inc); 1907 1908 return val; 1909 } 1910 1911 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs) 1912 { 1913 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1914 } 1915 1916 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs) 1917 { 1918 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1919 } 1920 1921 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs) 1922 { 1923 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1924 } 1925 1926 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs) 1927 { 1928 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1929 } 1930 1931 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs) 1932 { 1933 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1934 } 1935 1936 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs) 1937 { 1938 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1939 } 1940 1941 Type *Byte::getType() 1942 { 1943 return T(llvm::Type::getInt8Ty(*::context)); 1944 } 1945 1946 SByte::SByte(Argument<SByte> argument) 1947 { 1948 storeValue(argument.value); 1949 } 1950 1951 SByte::SByte(RValue<Int> cast) 1952 { 1953 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1954 1955 storeValue(integer); 1956 } 1957 1958 SByte::SByte(RValue<Short> cast) 1959 { 1960 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1961 1962 storeValue(integer); 1963 } 1964 1965 SByte::SByte(signed char x) 1966 { 1967 storeValue(Nucleus::createConstantByte(x)); 1968 } 1969 1970 SByte::SByte(RValue<SByte> rhs) 1971 { 1972 storeValue(rhs.value); 1973 } 1974 1975 SByte::SByte(const SByte &rhs) 1976 { 1977 Value *value = rhs.loadValue(); 1978 storeValue(value); 1979 } 1980 1981 SByte::SByte(const Reference<SByte> &rhs) 1982 { 1983 Value *value = rhs.loadValue(); 1984 storeValue(value); 1985 } 1986 1987 RValue<SByte> SByte::operator=(RValue<SByte> rhs) 1988 { 1989 storeValue(rhs.value); 1990 1991 return rhs; 1992 } 1993 1994 RValue<SByte> SByte::operator=(const SByte &rhs) 1995 { 1996 Value *value = rhs.loadValue(); 1997 storeValue(value); 1998 1999 return RValue<SByte>(value); 2000 } 2001 2002 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) 2003 { 2004 Value *value = rhs.loadValue(); 2005 storeValue(value); 2006 2007 return RValue<SByte>(value); 2008 } 2009 2010 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs) 2011 { 2012 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value)); 2013 } 2014 2015 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs) 2016 { 2017 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value)); 2018 } 2019 2020 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs) 2021 { 2022 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value)); 2023 } 2024 2025 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs) 2026 { 2027 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value)); 2028 } 2029 2030 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs) 2031 { 2032 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value)); 2033 } 2034 2035 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs) 2036 { 2037 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value)); 2038 } 2039 2040 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs) 2041 { 2042 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value)); 2043 } 2044 2045 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs) 2046 { 2047 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value)); 2048 } 2049 2050 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs) 2051 { 2052 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value)); 2053 } 2054 2055 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs) 2056 { 2057 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value)); 2058 } 2059 2060 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs) 2061 { 2062 return lhs = lhs + rhs; 2063 } 2064 2065 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs) 2066 { 2067 return lhs = lhs - rhs; 2068 } 2069 2070 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs) 2071 { 2072 return lhs = lhs * rhs; 2073 } 2074 2075 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs) 2076 { 2077 return lhs = lhs / rhs; 2078 } 2079 2080 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs) 2081 { 2082 return lhs = lhs % rhs; 2083 } 2084 2085 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs) 2086 { 2087 return lhs = lhs & rhs; 2088 } 2089 2090 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs) 2091 { 2092 return lhs = lhs | rhs; 2093 } 2094 2095 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs) 2096 { 2097 return lhs = lhs ^ rhs; 2098 } 2099 2100 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs) 2101 { 2102 return lhs = lhs << rhs; 2103 } 2104 2105 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs) 2106 { 2107 return lhs = lhs >> rhs; 2108 } 2109 2110 RValue<SByte> operator+(RValue<SByte> val) 2111 { 2112 return val; 2113 } 2114 2115 RValue<SByte> operator-(RValue<SByte> val) 2116 { 2117 return RValue<SByte>(Nucleus::createNeg(val.value)); 2118 } 2119 2120 RValue<SByte> operator~(RValue<SByte> val) 2121 { 2122 return RValue<SByte>(Nucleus::createNot(val.value)); 2123 } 2124 2125 RValue<SByte> operator++(SByte &val, int) // Post-increment 2126 { 2127 RValue<SByte> res = val; 2128 2129 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1)); 2130 val.storeValue(inc); 2131 2132 return res; 2133 } 2134 2135 const SByte &operator++(SByte &val) // Pre-increment 2136 { 2137 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1)); 2138 val.storeValue(inc); 2139 2140 return val; 2141 } 2142 2143 RValue<SByte> operator--(SByte &val, int) // Post-decrement 2144 { 2145 RValue<SByte> res = val; 2146 2147 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1)); 2148 val.storeValue(inc); 2149 2150 return res; 2151 } 2152 2153 const SByte &operator--(SByte &val) // Pre-decrement 2154 { 2155 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1)); 2156 val.storeValue(inc); 2157 2158 return val; 2159 } 2160 2161 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs) 2162 { 2163 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 2164 } 2165 2166 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs) 2167 { 2168 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 2169 } 2170 2171 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs) 2172 { 2173 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 2174 } 2175 2176 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs) 2177 { 2178 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 2179 } 2180 2181 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs) 2182 { 2183 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 2184 } 2185 2186 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs) 2187 { 2188 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 2189 } 2190 2191 Type *SByte::getType() 2192 { 2193 return T(llvm::Type::getInt8Ty(*::context)); 2194 } 2195 2196 Short::Short(Argument<Short> argument) 2197 { 2198 storeValue(argument.value); 2199 } 2200 2201 Short::Short(RValue<Int> cast) 2202 { 2203 Value *integer = Nucleus::createTrunc(cast.value, Short::getType()); 2204 2205 storeValue(integer); 2206 } 2207 2208 Short::Short(short x) 2209 { 2210 storeValue(Nucleus::createConstantShort(x)); 2211 } 2212 2213 Short::Short(RValue<Short> rhs) 2214 { 2215 storeValue(rhs.value); 2216 } 2217 2218 Short::Short(const Short &rhs) 2219 { 2220 Value *value = rhs.loadValue(); 2221 storeValue(value); 2222 } 2223 2224 Short::Short(const Reference<Short> &rhs) 2225 { 2226 Value *value = rhs.loadValue(); 2227 storeValue(value); 2228 } 2229 2230 RValue<Short> Short::operator=(RValue<Short> rhs) 2231 { 2232 storeValue(rhs.value); 2233 2234 return rhs; 2235 } 2236 2237 RValue<Short> Short::operator=(const Short &rhs) 2238 { 2239 Value *value = rhs.loadValue(); 2240 storeValue(value); 2241 2242 return RValue<Short>(value); 2243 } 2244 2245 RValue<Short> Short::operator=(const Reference<Short> &rhs) 2246 { 2247 Value *value = rhs.loadValue(); 2248 storeValue(value); 2249 2250 return RValue<Short>(value); 2251 } 2252 2253 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs) 2254 { 2255 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value)); 2256 } 2257 2258 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs) 2259 { 2260 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value)); 2261 } 2262 2263 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs) 2264 { 2265 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value)); 2266 } 2267 2268 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs) 2269 { 2270 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value)); 2271 } 2272 2273 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs) 2274 { 2275 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value)); 2276 } 2277 2278 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs) 2279 { 2280 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value)); 2281 } 2282 2283 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs) 2284 { 2285 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value)); 2286 } 2287 2288 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs) 2289 { 2290 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value)); 2291 } 2292 2293 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs) 2294 { 2295 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value)); 2296 } 2297 2298 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs) 2299 { 2300 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value)); 2301 } 2302 2303 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs) 2304 { 2305 return lhs = lhs + rhs; 2306 } 2307 2308 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs) 2309 { 2310 return lhs = lhs - rhs; 2311 } 2312 2313 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs) 2314 { 2315 return lhs = lhs * rhs; 2316 } 2317 2318 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs) 2319 { 2320 return lhs = lhs / rhs; 2321 } 2322 2323 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs) 2324 { 2325 return lhs = lhs % rhs; 2326 } 2327 2328 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs) 2329 { 2330 return lhs = lhs & rhs; 2331 } 2332 2333 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs) 2334 { 2335 return lhs = lhs | rhs; 2336 } 2337 2338 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs) 2339 { 2340 return lhs = lhs ^ rhs; 2341 } 2342 2343 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs) 2344 { 2345 return lhs = lhs << rhs; 2346 } 2347 2348 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs) 2349 { 2350 return lhs = lhs >> rhs; 2351 } 2352 2353 RValue<Short> operator+(RValue<Short> val) 2354 { 2355 return val; 2356 } 2357 2358 RValue<Short> operator-(RValue<Short> val) 2359 { 2360 return RValue<Short>(Nucleus::createNeg(val.value)); 2361 } 2362 2363 RValue<Short> operator~(RValue<Short> val) 2364 { 2365 return RValue<Short>(Nucleus::createNot(val.value)); 2366 } 2367 2368 RValue<Short> operator++(Short &val, int) // Post-increment 2369 { 2370 RValue<Short> res = val; 2371 2372 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1)); 2373 val.storeValue(inc); 2374 2375 return res; 2376 } 2377 2378 const Short &operator++(Short &val) // Pre-increment 2379 { 2380 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1)); 2381 val.storeValue(inc); 2382 2383 return val; 2384 } 2385 2386 RValue<Short> operator--(Short &val, int) // Post-decrement 2387 { 2388 RValue<Short> res = val; 2389 2390 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1)); 2391 val.storeValue(inc); 2392 2393 return res; 2394 } 2395 2396 const Short &operator--(Short &val) // Pre-decrement 2397 { 2398 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1)); 2399 val.storeValue(inc); 2400 2401 return val; 2402 } 2403 2404 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs) 2405 { 2406 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 2407 } 2408 2409 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs) 2410 { 2411 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 2412 } 2413 2414 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs) 2415 { 2416 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 2417 } 2418 2419 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs) 2420 { 2421 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 2422 } 2423 2424 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs) 2425 { 2426 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 2427 } 2428 2429 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs) 2430 { 2431 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 2432 } 2433 2434 Type *Short::getType() 2435 { 2436 return T(llvm::Type::getInt16Ty(*::context)); 2437 } 2438 2439 UShort::UShort(Argument<UShort> argument) 2440 { 2441 storeValue(argument.value); 2442 } 2443 2444 UShort::UShort(RValue<UInt> cast) 2445 { 2446 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 2447 2448 storeValue(integer); 2449 } 2450 2451 UShort::UShort(RValue<Int> cast) 2452 { 2453 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 2454 2455 storeValue(integer); 2456 } 2457 2458 UShort::UShort(unsigned short x) 2459 { 2460 storeValue(Nucleus::createConstantShort(x)); 2461 } 2462 2463 UShort::UShort(RValue<UShort> rhs) 2464 { 2465 storeValue(rhs.value); 2466 } 2467 2468 UShort::UShort(const UShort &rhs) 2469 { 2470 Value *value = rhs.loadValue(); 2471 storeValue(value); 2472 } 2473 2474 UShort::UShort(const Reference<UShort> &rhs) 2475 { 2476 Value *value = rhs.loadValue(); 2477 storeValue(value); 2478 } 2479 2480 RValue<UShort> UShort::operator=(RValue<UShort> rhs) 2481 { 2482 storeValue(rhs.value); 2483 2484 return rhs; 2485 } 2486 2487 RValue<UShort> UShort::operator=(const UShort &rhs) 2488 { 2489 Value *value = rhs.loadValue(); 2490 storeValue(value); 2491 2492 return RValue<UShort>(value); 2493 } 2494 2495 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) 2496 { 2497 Value *value = rhs.loadValue(); 2498 storeValue(value); 2499 2500 return RValue<UShort>(value); 2501 } 2502 2503 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs) 2504 { 2505 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value)); 2506 } 2507 2508 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs) 2509 { 2510 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value)); 2511 } 2512 2513 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs) 2514 { 2515 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value)); 2516 } 2517 2518 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs) 2519 { 2520 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value)); 2521 } 2522 2523 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs) 2524 { 2525 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value)); 2526 } 2527 2528 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs) 2529 { 2530 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value)); 2531 } 2532 2533 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs) 2534 { 2535 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value)); 2536 } 2537 2538 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs) 2539 { 2540 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value)); 2541 } 2542 2543 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs) 2544 { 2545 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value)); 2546 } 2547 2548 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs) 2549 { 2550 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value)); 2551 } 2552 2553 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs) 2554 { 2555 return lhs = lhs + rhs; 2556 } 2557 2558 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs) 2559 { 2560 return lhs = lhs - rhs; 2561 } 2562 2563 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs) 2564 { 2565 return lhs = lhs * rhs; 2566 } 2567 2568 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs) 2569 { 2570 return lhs = lhs / rhs; 2571 } 2572 2573 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs) 2574 { 2575 return lhs = lhs % rhs; 2576 } 2577 2578 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs) 2579 { 2580 return lhs = lhs & rhs; 2581 } 2582 2583 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs) 2584 { 2585 return lhs = lhs | rhs; 2586 } 2587 2588 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs) 2589 { 2590 return lhs = lhs ^ rhs; 2591 } 2592 2593 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs) 2594 { 2595 return lhs = lhs << rhs; 2596 } 2597 2598 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs) 2599 { 2600 return lhs = lhs >> rhs; 2601 } 2602 2603 RValue<UShort> operator+(RValue<UShort> val) 2604 { 2605 return val; 2606 } 2607 2608 RValue<UShort> operator-(RValue<UShort> val) 2609 { 2610 return RValue<UShort>(Nucleus::createNeg(val.value)); 2611 } 2612 2613 RValue<UShort> operator~(RValue<UShort> val) 2614 { 2615 return RValue<UShort>(Nucleus::createNot(val.value)); 2616 } 2617 2618 RValue<UShort> operator++(UShort &val, int) // Post-increment 2619 { 2620 RValue<UShort> res = val; 2621 2622 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1)); 2623 val.storeValue(inc); 2624 2625 return res; 2626 } 2627 2628 const UShort &operator++(UShort &val) // Pre-increment 2629 { 2630 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1)); 2631 val.storeValue(inc); 2632 2633 return val; 2634 } 2635 2636 RValue<UShort> operator--(UShort &val, int) // Post-decrement 2637 { 2638 RValue<UShort> res = val; 2639 2640 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1)); 2641 val.storeValue(inc); 2642 2643 return res; 2644 } 2645 2646 const UShort &operator--(UShort &val) // Pre-decrement 2647 { 2648 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1)); 2649 val.storeValue(inc); 2650 2651 return val; 2652 } 2653 2654 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs) 2655 { 2656 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 2657 } 2658 2659 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs) 2660 { 2661 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 2662 } 2663 2664 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs) 2665 { 2666 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 2667 } 2668 2669 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs) 2670 { 2671 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 2672 } 2673 2674 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs) 2675 { 2676 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 2677 } 2678 2679 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs) 2680 { 2681 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 2682 } 2683 2684 Type *UShort::getType() 2685 { 2686 return T(llvm::Type::getInt16Ty(*::context)); 2687 } 2688 2689 Byte4::Byte4(RValue<Byte8> cast) 2690 { 2691 storeValue(Nucleus::createBitCast(cast.value, getType())); 2692 } 2693 2694 Byte4::Byte4(const Reference<Byte4> &rhs) 2695 { 2696 Value *value = rhs.loadValue(); 2697 storeValue(value); 2698 } 2699 2700 Type *Byte4::getType() 2701 { 2702 return T(Type_v4i8); 2703 } 2704 2705 Type *SByte4::getType() 2706 { 2707 return T(Type_v4i8); 2708 } 2709 2710 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2711 { 2712 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2713 storeValue(Nucleus::createConstantVector(constantVector, getType())); 2714 } 2715 2716 Byte8::Byte8(RValue<Byte8> rhs) 2717 { 2718 storeValue(rhs.value); 2719 } 2720 2721 Byte8::Byte8(const Byte8 &rhs) 2722 { 2723 Value *value = rhs.loadValue(); 2724 storeValue(value); 2725 } 2726 2727 Byte8::Byte8(const Reference<Byte8> &rhs) 2728 { 2729 Value *value = rhs.loadValue(); 2730 storeValue(value); 2731 } 2732 2733 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) 2734 { 2735 storeValue(rhs.value); 2736 2737 return rhs; 2738 } 2739 2740 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) 2741 { 2742 Value *value = rhs.loadValue(); 2743 storeValue(value); 2744 2745 return RValue<Byte8>(value); 2746 } 2747 2748 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) 2749 { 2750 Value *value = rhs.loadValue(); 2751 storeValue(value); 2752 2753 return RValue<Byte8>(value); 2754 } 2755 2756 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) 2757 { 2758 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2759 } 2760 2761 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) 2762 { 2763 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value)); 2764 } 2765 2766 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) 2767 // { 2768 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value)); 2769 // } 2770 2771 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs) 2772 // { 2773 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value)); 2774 // } 2775 2776 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs) 2777 // { 2778 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value)); 2779 // } 2780 2781 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) 2782 { 2783 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2784 } 2785 2786 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) 2787 { 2788 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value)); 2789 } 2790 2791 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) 2792 { 2793 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value)); 2794 } 2795 2796 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) 2797 // { 2798 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value)); 2799 // } 2800 2801 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs) 2802 // { 2803 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value)); 2804 // } 2805 2806 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs) 2807 { 2808 return lhs = lhs + rhs; 2809 } 2810 2811 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs) 2812 { 2813 return lhs = lhs - rhs; 2814 } 2815 2816 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs) 2817 // { 2818 // return lhs = lhs * rhs; 2819 // } 2820 2821 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs) 2822 // { 2823 // return lhs = lhs / rhs; 2824 // } 2825 2826 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs) 2827 // { 2828 // return lhs = lhs % rhs; 2829 // } 2830 2831 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs) 2832 { 2833 return lhs = lhs & rhs; 2834 } 2835 2836 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs) 2837 { 2838 return lhs = lhs | rhs; 2839 } 2840 2841 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs) 2842 { 2843 return lhs = lhs ^ rhs; 2844 } 2845 2846 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs) 2847 // { 2848 // return lhs = lhs << rhs; 2849 // } 2850 2851 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs) 2852 // { 2853 // return lhs = lhs >> rhs; 2854 // } 2855 2856 // RValue<Byte8> operator+(RValue<Byte8> val) 2857 // { 2858 // return val; 2859 // } 2860 2861 // RValue<Byte8> operator-(RValue<Byte8> val) 2862 // { 2863 // return RValue<Byte8>(Nucleus::createNeg(val.value)); 2864 // } 2865 2866 RValue<Byte8> operator~(RValue<Byte8> val) 2867 { 2868 return RValue<Byte8>(Nucleus::createNot(val.value)); 2869 } 2870 2871 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) 2872 { 2873 #if defined(__i386__) || defined(__x86_64__) 2874 return x86::paddusb(x, y); 2875 #else 2876 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value)))); 2877 #endif 2878 } 2879 2880 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y) 2881 { 2882 #if defined(__i386__) || defined(__x86_64__) 2883 return x86::psubusb(x, y); 2884 #else 2885 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value)))); 2886 #endif 2887 } 2888 2889 RValue<Short4> Unpack(RValue<Byte4> x) 2890 { 2891 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8 2892 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 2893 } 2894 2895 RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y) 2896 { 2897 return UnpackLow(As<Byte8>(x), As<Byte8>(y)); 2898 } 2899 2900 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) 2901 { 2902 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2903 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2904 } 2905 2906 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) 2907 { 2908 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 2909 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 2910 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 2911 } 2912 2913 RValue<Int> SignMask(RValue<Byte8> x) 2914 { 2915 #if defined(__i386__) || defined(__x86_64__) 2916 return x86::pmovmskb(x); 2917 #else 2918 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType())))); 2919 #endif 2920 } 2921 2922 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) 2923 // { 2924 //#if defined(__i386__) || defined(__x86_64__) 2925 // return x86::pcmpgtb(x, y); // FIXME: Signedness 2926 //#else 2927 // return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType())))); 2928 //#endif 2929 // } 2930 2931 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y) 2932 { 2933 #if defined(__i386__) || defined(__x86_64__) 2934 return x86::pcmpeqb(x, y); 2935 #else 2936 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType())))); 2937 #endif 2938 } 2939 2940 Type *Byte8::getType() 2941 { 2942 return T(Type_v8i8); 2943 } 2944 2945 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2946 { 2947 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2948 Value *vector = Nucleus::createConstantVector(constantVector, getType()); 2949 2950 storeValue(Nucleus::createBitCast(vector, getType())); 2951 } 2952 2953 SByte8::SByte8(RValue<SByte8> rhs) 2954 { 2955 storeValue(rhs.value); 2956 } 2957 2958 SByte8::SByte8(const SByte8 &rhs) 2959 { 2960 Value *value = rhs.loadValue(); 2961 storeValue(value); 2962 } 2963 2964 SByte8::SByte8(const Reference<SByte8> &rhs) 2965 { 2966 Value *value = rhs.loadValue(); 2967 storeValue(value); 2968 } 2969 2970 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) 2971 { 2972 storeValue(rhs.value); 2973 2974 return rhs; 2975 } 2976 2977 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) 2978 { 2979 Value *value = rhs.loadValue(); 2980 storeValue(value); 2981 2982 return RValue<SByte8>(value); 2983 } 2984 2985 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) 2986 { 2987 Value *value = rhs.loadValue(); 2988 storeValue(value); 2989 2990 return RValue<SByte8>(value); 2991 } 2992 2993 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) 2994 { 2995 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2996 } 2997 2998 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) 2999 { 3000 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value)); 3001 } 3002 3003 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) 3004 // { 3005 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value)); 3006 // } 3007 3008 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs) 3009 // { 3010 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value)); 3011 // } 3012 3013 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs) 3014 // { 3015 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value)); 3016 // } 3017 3018 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs) 3019 { 3020 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value)); 3021 } 3022 3023 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs) 3024 { 3025 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value)); 3026 } 3027 3028 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs) 3029 { 3030 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value)); 3031 } 3032 3033 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs) 3034 // { 3035 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value)); 3036 // } 3037 3038 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) 3039 // { 3040 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value)); 3041 // } 3042 3043 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs) 3044 { 3045 return lhs = lhs + rhs; 3046 } 3047 3048 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs) 3049 { 3050 return lhs = lhs - rhs; 3051 } 3052 3053 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs) 3054 // { 3055 // return lhs = lhs * rhs; 3056 // } 3057 3058 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs) 3059 // { 3060 // return lhs = lhs / rhs; 3061 // } 3062 3063 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs) 3064 // { 3065 // return lhs = lhs % rhs; 3066 // } 3067 3068 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs) 3069 { 3070 return lhs = lhs & rhs; 3071 } 3072 3073 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs) 3074 { 3075 return lhs = lhs | rhs; 3076 } 3077 3078 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs) 3079 { 3080 return lhs = lhs ^ rhs; 3081 } 3082 3083 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs) 3084 // { 3085 // return lhs = lhs << rhs; 3086 // } 3087 3088 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs) 3089 // { 3090 // return lhs = lhs >> rhs; 3091 // } 3092 3093 // RValue<SByte8> operator+(RValue<SByte8> val) 3094 // { 3095 // return val; 3096 // } 3097 3098 // RValue<SByte8> operator-(RValue<SByte8> val) 3099 // { 3100 // return RValue<SByte8>(Nucleus::createNeg(val.value)); 3101 // } 3102 3103 RValue<SByte8> operator~(RValue<SByte8> val) 3104 { 3105 return RValue<SByte8>(Nucleus::createNot(val.value)); 3106 } 3107 3108 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) 3109 { 3110 #if defined(__i386__) || defined(__x86_64__) 3111 return x86::paddsb(x, y); 3112 #else 3113 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value)))); 3114 #endif 3115 } 3116 3117 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y) 3118 { 3119 #if defined(__i386__) || defined(__x86_64__) 3120 return x86::psubsb(x, y); 3121 #else 3122 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value)))); 3123 #endif 3124 } 3125 3126 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) 3127 { 3128 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 3129 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 3130 } 3131 3132 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) 3133 { 3134 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8 3135 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 3136 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); 3137 } 3138 3139 RValue<Int> SignMask(RValue<SByte8> x) 3140 { 3141 #if defined(__i386__) || defined(__x86_64__) 3142 return x86::pmovmskb(As<Byte8>(x)); 3143 #else 3144 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType())))); 3145 #endif 3146 } 3147 3148 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) 3149 { 3150 #if defined(__i386__) || defined(__x86_64__) 3151 return x86::pcmpgtb(x, y); 3152 #else 3153 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType())))); 3154 #endif 3155 } 3156 3157 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y) 3158 { 3159 #if defined(__i386__) || defined(__x86_64__) 3160 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y)); 3161 #else 3162 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType())))); 3163 #endif 3164 } 3165 3166 Type *SByte8::getType() 3167 { 3168 return T(Type_v8i8); 3169 } 3170 3171 Byte16::Byte16(RValue<Byte16> rhs) 3172 { 3173 storeValue(rhs.value); 3174 } 3175 3176 Byte16::Byte16(const Byte16 &rhs) 3177 { 3178 Value *value = rhs.loadValue(); 3179 storeValue(value); 3180 } 3181 3182 Byte16::Byte16(const Reference<Byte16> &rhs) 3183 { 3184 Value *value = rhs.loadValue(); 3185 storeValue(value); 3186 } 3187 3188 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) 3189 { 3190 storeValue(rhs.value); 3191 3192 return rhs; 3193 } 3194 3195 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) 3196 { 3197 Value *value = rhs.loadValue(); 3198 storeValue(value); 3199 3200 return RValue<Byte16>(value); 3201 } 3202 3203 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) 3204 { 3205 Value *value = rhs.loadValue(); 3206 storeValue(value); 3207 3208 return RValue<Byte16>(value); 3209 } 3210 3211 Type *Byte16::getType() 3212 { 3213 return T(llvm::VectorType::get(T(Byte::getType()), 16)); 3214 } 3215 3216 Type *SByte16::getType() 3217 { 3218 return T(llvm::VectorType::get(T(SByte::getType()), 16)); 3219 } 3220 3221 Short2::Short2(RValue<Short4> cast) 3222 { 3223 storeValue(Nucleus::createBitCast(cast.value, getType())); 3224 } 3225 3226 Type *Short2::getType() 3227 { 3228 return T(Type_v2i16); 3229 } 3230 3231 UShort2::UShort2(RValue<UShort4> cast) 3232 { 3233 storeValue(Nucleus::createBitCast(cast.value, getType())); 3234 } 3235 3236 Type *UShort2::getType() 3237 { 3238 return T(Type_v2i16); 3239 } 3240 3241 Short4::Short4(RValue<Int> cast) 3242 { 3243 Value *vector = loadValue(); 3244 Value *element = Nucleus::createTrunc(cast.value, Short::getType()); 3245 Value *insert = Nucleus::createInsertElement(vector, element, 0); 3246 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value; 3247 3248 storeValue(swizzle); 3249 } 3250 3251 Short4::Short4(RValue<Int4> cast) 3252 { 3253 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6}; 3254 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); 3255 3256 Value *packed = Nucleus::createShuffleVector(short8, short8, select); 3257 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value; 3258 3259 storeValue(short4); 3260 } 3261 3262 // Short4::Short4(RValue<Float> cast) 3263 // { 3264 // } 3265 3266 Short4::Short4(RValue<Float4> cast) 3267 { 3268 Int4 v4i32 = Int4(cast); 3269 #if defined(__i386__) || defined(__x86_64__) 3270 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32)); 3271 #else 3272 Value *v = v4i32.loadValue(); 3273 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true))); 3274 #endif 3275 3276 storeValue(As<Short4>(Int2(v4i32)).value); 3277 } 3278 3279 Short4::Short4(short xyzw) 3280 { 3281 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 3282 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3283 } 3284 3285 Short4::Short4(short x, short y, short z, short w) 3286 { 3287 int64_t constantVector[4] = {x, y, z, w}; 3288 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3289 } 3290 3291 Short4::Short4(RValue<Short4> rhs) 3292 { 3293 storeValue(rhs.value); 3294 } 3295 3296 Short4::Short4(const Short4 &rhs) 3297 { 3298 Value *value = rhs.loadValue(); 3299 storeValue(value); 3300 } 3301 3302 Short4::Short4(const Reference<Short4> &rhs) 3303 { 3304 Value *value = rhs.loadValue(); 3305 storeValue(value); 3306 } 3307 3308 Short4::Short4(RValue<UShort4> rhs) 3309 { 3310 storeValue(rhs.value); 3311 } 3312 3313 Short4::Short4(const UShort4 &rhs) 3314 { 3315 storeValue(rhs.loadValue()); 3316 } 3317 3318 Short4::Short4(const Reference<UShort4> &rhs) 3319 { 3320 storeValue(rhs.loadValue()); 3321 } 3322 3323 RValue<Short4> Short4::operator=(RValue<Short4> rhs) 3324 { 3325 storeValue(rhs.value); 3326 3327 return rhs; 3328 } 3329 3330 RValue<Short4> Short4::operator=(const Short4 &rhs) 3331 { 3332 Value *value = rhs.loadValue(); 3333 storeValue(value); 3334 3335 return RValue<Short4>(value); 3336 } 3337 3338 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) 3339 { 3340 Value *value = rhs.loadValue(); 3341 storeValue(value); 3342 3343 return RValue<Short4>(value); 3344 } 3345 3346 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) 3347 { 3348 storeValue(rhs.value); 3349 3350 return RValue<Short4>(rhs); 3351 } 3352 3353 RValue<Short4> Short4::operator=(const UShort4 &rhs) 3354 { 3355 Value *value = rhs.loadValue(); 3356 storeValue(value); 3357 3358 return RValue<Short4>(value); 3359 } 3360 3361 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) 3362 { 3363 Value *value = rhs.loadValue(); 3364 storeValue(value); 3365 3366 return RValue<Short4>(value); 3367 } 3368 3369 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) 3370 { 3371 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value)); 3372 } 3373 3374 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) 3375 { 3376 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value)); 3377 } 3378 3379 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) 3380 { 3381 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value)); 3382 } 3383 3384 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) 3385 // { 3386 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value)); 3387 // } 3388 3389 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs) 3390 // { 3391 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value)); 3392 // } 3393 3394 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) 3395 { 3396 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value)); 3397 } 3398 3399 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) 3400 { 3401 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value)); 3402 } 3403 3404 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) 3405 { 3406 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value)); 3407 } 3408 3409 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) 3410 { 3411 #if defined(__i386__) || defined(__x86_64__) 3412 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3413 3414 return x86::psllw(lhs, rhs); 3415 #else 3416 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs))); 3417 #endif 3418 } 3419 3420 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs) 3421 { 3422 #if defined(__i386__) || defined(__x86_64__) 3423 return x86::psraw(lhs, rhs); 3424 #else 3425 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs))); 3426 #endif 3427 } 3428 3429 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs) 3430 { 3431 return lhs = lhs + rhs; 3432 } 3433 3434 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs) 3435 { 3436 return lhs = lhs - rhs; 3437 } 3438 3439 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs) 3440 { 3441 return lhs = lhs * rhs; 3442 } 3443 3444 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs) 3445 // { 3446 // return lhs = lhs / rhs; 3447 // } 3448 3449 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs) 3450 // { 3451 // return lhs = lhs % rhs; 3452 // } 3453 3454 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs) 3455 { 3456 return lhs = lhs & rhs; 3457 } 3458 3459 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs) 3460 { 3461 return lhs = lhs | rhs; 3462 } 3463 3464 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs) 3465 { 3466 return lhs = lhs ^ rhs; 3467 } 3468 3469 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs) 3470 { 3471 return lhs = lhs << rhs; 3472 } 3473 3474 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs) 3475 { 3476 return lhs = lhs >> rhs; 3477 } 3478 3479 // RValue<Short4> operator+(RValue<Short4> val) 3480 // { 3481 // return val; 3482 // } 3483 3484 RValue<Short4> operator-(RValue<Short4> val) 3485 { 3486 return RValue<Short4>(Nucleus::createNeg(val.value)); 3487 } 3488 3489 RValue<Short4> operator~(RValue<Short4> val) 3490 { 3491 return RValue<Short4>(Nucleus::createNot(val.value)); 3492 } 3493 3494 RValue<Short4> RoundShort4(RValue<Float4> cast) 3495 { 3496 RValue<Int4> int4 = RoundInt(cast); 3497 return As<Short4>(PackSigned(int4, int4)); 3498 } 3499 3500 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) 3501 { 3502 #if defined(__i386__) || defined(__x86_64__) 3503 return x86::pmaxsw(x, y); 3504 #else 3505 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT))); 3506 #endif 3507 } 3508 3509 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y) 3510 { 3511 #if defined(__i386__) || defined(__x86_64__) 3512 return x86::pminsw(x, y); 3513 #else 3514 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT))); 3515 #endif 3516 } 3517 3518 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y) 3519 { 3520 #if defined(__i386__) || defined(__x86_64__) 3521 return x86::paddsw(x, y); 3522 #else 3523 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value)))); 3524 #endif 3525 } 3526 3527 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y) 3528 { 3529 #if defined(__i386__) || defined(__x86_64__) 3530 return x86::psubsw(x, y); 3531 #else 3532 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value)))); 3533 #endif 3534 } 3535 3536 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y) 3537 { 3538 #if defined(__i386__) || defined(__x86_64__) 3539 return x86::pmulhw(x, y); 3540 #else 3541 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true))); 3542 #endif 3543 } 3544 3545 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y) 3546 { 3547 #if defined(__i386__) || defined(__x86_64__) 3548 return x86::pmaddwd(x, y); 3549 #else 3550 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value)))); 3551 #endif 3552 } 3553 3554 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y) 3555 { 3556 #if defined(__i386__) || defined(__x86_64__) 3557 auto result = x86::packsswb(x, y); 3558 #else 3559 auto result = V(lowerPack(V(x.value), V(y.value), true)); 3560 #endif 3561 return As<SByte8>(Swizzle(As<Int4>(result), 0x88)); 3562 } 3563 3564 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y) 3565 { 3566 #if defined(__i386__) || defined(__x86_64__) 3567 auto result = x86::packuswb(x, y); 3568 #else 3569 auto result = V(lowerPack(V(x.value), V(y.value), false)); 3570 #endif 3571 return As<Byte8>(Swizzle(As<Int4>(result), 0x88)); 3572 } 3573 3574 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) 3575 { 3576 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 3577 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 3578 } 3579 3580 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) 3581 { 3582 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 3583 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 3584 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE)); 3585 } 3586 3587 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) 3588 { 3589 // Real type is v8i16 3590 int shuffle[8] = 3591 { 3592 (select >> 0) & 0x03, 3593 (select >> 2) & 0x03, 3594 (select >> 4) & 0x03, 3595 (select >> 6) & 0x03, 3596 (select >> 0) & 0x03, 3597 (select >> 2) & 0x03, 3598 (select >> 4) & 0x03, 3599 (select >> 6) & 0x03, 3600 }; 3601 3602 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); 3603 } 3604 3605 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) 3606 { 3607 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i)); 3608 } 3609 3610 RValue<Short> Extract(RValue<Short4> val, int i) 3611 { 3612 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i)); 3613 } 3614 3615 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) 3616 { 3617 #if defined(__i386__) || defined(__x86_64__) 3618 return x86::pcmpgtw(x, y); 3619 #else 3620 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType())))); 3621 #endif 3622 } 3623 3624 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y) 3625 { 3626 #if defined(__i386__) || defined(__x86_64__) 3627 return x86::pcmpeqw(x, y); 3628 #else 3629 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType())))); 3630 #endif 3631 } 3632 3633 Type *Short4::getType() 3634 { 3635 return T(Type_v4i16); 3636 } 3637 3638 UShort4::UShort4(RValue<Int4> cast) 3639 { 3640 *this = Short4(cast); 3641 } 3642 3643 UShort4::UShort4(RValue<Float4> cast, bool saturate) 3644 { 3645 if(saturate) 3646 { 3647 #if defined(__i386__) || defined(__x86_64__) 3648 if(CPUID::supportsSSE4_1()) 3649 { 3650 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation 3651 *this = As<Short4>(PackUnsigned(int4, int4)); 3652 } 3653 else 3654 #endif 3655 { 3656 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)))); 3657 } 3658 } 3659 else 3660 { 3661 *this = Short4(Int4(cast)); 3662 } 3663 } 3664 3665 UShort4::UShort4(unsigned short xyzw) 3666 { 3667 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 3668 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3669 } 3670 3671 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 3672 { 3673 int64_t constantVector[4] = {x, y, z, w}; 3674 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3675 } 3676 3677 UShort4::UShort4(RValue<UShort4> rhs) 3678 { 3679 storeValue(rhs.value); 3680 } 3681 3682 UShort4::UShort4(const UShort4 &rhs) 3683 { 3684 Value *value = rhs.loadValue(); 3685 storeValue(value); 3686 } 3687 3688 UShort4::UShort4(const Reference<UShort4> &rhs) 3689 { 3690 Value *value = rhs.loadValue(); 3691 storeValue(value); 3692 } 3693 3694 UShort4::UShort4(RValue<Short4> rhs) 3695 { 3696 storeValue(rhs.value); 3697 } 3698 3699 UShort4::UShort4(const Short4 &rhs) 3700 { 3701 Value *value = rhs.loadValue(); 3702 storeValue(value); 3703 } 3704 3705 UShort4::UShort4(const Reference<Short4> &rhs) 3706 { 3707 Value *value = rhs.loadValue(); 3708 storeValue(value); 3709 } 3710 3711 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) 3712 { 3713 storeValue(rhs.value); 3714 3715 return rhs; 3716 } 3717 3718 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) 3719 { 3720 Value *value = rhs.loadValue(); 3721 storeValue(value); 3722 3723 return RValue<UShort4>(value); 3724 } 3725 3726 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) 3727 { 3728 Value *value = rhs.loadValue(); 3729 storeValue(value); 3730 3731 return RValue<UShort4>(value); 3732 } 3733 3734 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) 3735 { 3736 storeValue(rhs.value); 3737 3738 return RValue<UShort4>(rhs); 3739 } 3740 3741 RValue<UShort4> UShort4::operator=(const Short4 &rhs) 3742 { 3743 Value *value = rhs.loadValue(); 3744 storeValue(value); 3745 3746 return RValue<UShort4>(value); 3747 } 3748 3749 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) 3750 { 3751 Value *value = rhs.loadValue(); 3752 storeValue(value); 3753 3754 return RValue<UShort4>(value); 3755 } 3756 3757 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) 3758 { 3759 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value)); 3760 } 3761 3762 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) 3763 { 3764 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value)); 3765 } 3766 3767 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) 3768 { 3769 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value)); 3770 } 3771 3772 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs) 3773 { 3774 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value)); 3775 } 3776 3777 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs) 3778 { 3779 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value)); 3780 } 3781 3782 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs) 3783 { 3784 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value)); 3785 } 3786 3787 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) 3788 { 3789 #if defined(__i386__) || defined(__x86_64__) 3790 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3791 3792 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3793 #else 3794 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs))); 3795 #endif 3796 } 3797 3798 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs) 3799 { 3800 #if defined(__i386__) || defined(__x86_64__) 3801 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3802 3803 return x86::psrlw(lhs, rhs); 3804 #else 3805 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs))); 3806 #endif 3807 } 3808 3809 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs) 3810 { 3811 return lhs = lhs << rhs; 3812 } 3813 3814 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs) 3815 { 3816 return lhs = lhs >> rhs; 3817 } 3818 3819 RValue<UShort4> operator~(RValue<UShort4> val) 3820 { 3821 return RValue<UShort4>(Nucleus::createNot(val.value)); 3822 } 3823 3824 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) 3825 { 3826 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3827 } 3828 3829 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y) 3830 { 3831 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3832 } 3833 3834 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y) 3835 { 3836 #if defined(__i386__) || defined(__x86_64__) 3837 return x86::paddusw(x, y); 3838 #else 3839 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value)))); 3840 #endif 3841 } 3842 3843 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y) 3844 { 3845 #if defined(__i386__) || defined(__x86_64__) 3846 return x86::psubusw(x, y); 3847 #else 3848 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value)))); 3849 #endif 3850 } 3851 3852 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y) 3853 { 3854 #if defined(__i386__) || defined(__x86_64__) 3855 return x86::pmulhuw(x, y); 3856 #else 3857 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false))); 3858 #endif 3859 } 3860 3861 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) 3862 { 3863 #if defined(__i386__) || defined(__x86_64__) 3864 return x86::pavgw(x, y); 3865 #else 3866 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value)))); 3867 #endif 3868 } 3869 3870 Type *UShort4::getType() 3871 { 3872 return T(Type_v4i16); 3873 } 3874 3875 Short8::Short8(short c) 3876 { 3877 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3878 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3879 } 3880 3881 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) 3882 { 3883 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3884 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3885 } 3886 3887 Short8::Short8(RValue<Short8> rhs) 3888 { 3889 storeValue(rhs.value); 3890 } 3891 3892 Short8::Short8(const Reference<Short8> &rhs) 3893 { 3894 Value *value = rhs.loadValue(); 3895 storeValue(value); 3896 } 3897 3898 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) 3899 { 3900 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3901 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3902 3903 storeValue(packed); 3904 } 3905 3906 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) 3907 { 3908 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value)); 3909 } 3910 3911 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs) 3912 { 3913 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value)); 3914 } 3915 3916 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs) 3917 { 3918 #if defined(__i386__) || defined(__x86_64__) 3919 return x86::psllw(lhs, rhs); 3920 #else 3921 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs))); 3922 #endif 3923 } 3924 3925 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs) 3926 { 3927 #if defined(__i386__) || defined(__x86_64__) 3928 return x86::psraw(lhs, rhs); 3929 #else 3930 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs))); 3931 #endif 3932 } 3933 3934 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y) 3935 { 3936 #if defined(__i386__) || defined(__x86_64__) 3937 return x86::pmaddwd(x, y); 3938 #else 3939 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value)))); 3940 #endif 3941 } 3942 3943 RValue<Int4> Abs(RValue<Int4> x) 3944 { 3945 auto negative = x >> 31; 3946 return (x ^ negative) - negative; 3947 } 3948 3949 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) 3950 { 3951 #if defined(__i386__) || defined(__x86_64__) 3952 return x86::pmulhw(x, y); 3953 #else 3954 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true))); 3955 #endif 3956 } 3957 3958 Type *Short8::getType() 3959 { 3960 return T(llvm::VectorType::get(T(Short::getType()), 8)); 3961 } 3962 3963 UShort8::UShort8(unsigned short c) 3964 { 3965 int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; 3966 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3967 } 3968 3969 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) 3970 { 3971 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3972 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3973 } 3974 3975 UShort8::UShort8(RValue<UShort8> rhs) 3976 { 3977 storeValue(rhs.value); 3978 } 3979 3980 UShort8::UShort8(const Reference<UShort8> &rhs) 3981 { 3982 Value *value = rhs.loadValue(); 3983 storeValue(value); 3984 } 3985 3986 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) 3987 { 3988 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16 3989 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 3990 3991 storeValue(packed); 3992 } 3993 3994 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) 3995 { 3996 storeValue(rhs.value); 3997 3998 return rhs; 3999 } 4000 4001 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) 4002 { 4003 Value *value = rhs.loadValue(); 4004 storeValue(value); 4005 4006 return RValue<UShort8>(value); 4007 } 4008 4009 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) 4010 { 4011 Value *value = rhs.loadValue(); 4012 storeValue(value); 4013 4014 return RValue<UShort8>(value); 4015 } 4016 4017 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs) 4018 { 4019 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value)); 4020 } 4021 4022 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs) 4023 { 4024 #if defined(__i386__) || defined(__x86_64__) 4025 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); 4026 #else 4027 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs))); 4028 #endif 4029 } 4030 4031 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) 4032 { 4033 #if defined(__i386__) || defined(__x86_64__) 4034 return x86::psrlw(lhs, rhs); // FIXME: Fallback required 4035 #else 4036 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs))); 4037 #endif 4038 } 4039 4040 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs) 4041 { 4042 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value)); 4043 } 4044 4045 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs) 4046 { 4047 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value)); 4048 } 4049 4050 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs) 4051 { 4052 return lhs = lhs + rhs; 4053 } 4054 4055 RValue<UShort8> operator~(RValue<UShort8> val) 4056 { 4057 return RValue<UShort8>(Nucleus::createNot(val.value)); 4058 } 4059 4060 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7) 4061 { 4062 int pshufb[16] = 4063 { 4064 select0 + 0, 4065 select0 + 1, 4066 select1 + 0, 4067 select1 + 1, 4068 select2 + 0, 4069 select2 + 1, 4070 select3 + 0, 4071 select3 + 1, 4072 select4 + 0, 4073 select4 + 1, 4074 select5 + 0, 4075 select5 + 1, 4076 select6 + 0, 4077 select6 + 1, 4078 select7 + 0, 4079 select7 + 1, 4080 }; 4081 4082 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType()); 4083 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb); 4084 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 4085 4086 return RValue<UShort8>(short8); 4087 } 4088 4089 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) 4090 { 4091 #if defined(__i386__) || defined(__x86_64__) 4092 return x86::pmulhuw(x, y); 4093 #else 4094 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false))); 4095 #endif 4096 } 4097 4098 Type *UShort8::getType() 4099 { 4100 return T(llvm::VectorType::get(T(UShort::getType()), 8)); 4101 } 4102 4103 Int::Int(Argument<Int> argument) 4104 { 4105 storeValue(argument.value); 4106 } 4107 4108 Int::Int(RValue<Byte> cast) 4109 { 4110 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 4111 4112 storeValue(integer); 4113 } 4114 4115 Int::Int(RValue<SByte> cast) 4116 { 4117 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 4118 4119 storeValue(integer); 4120 } 4121 4122 Int::Int(RValue<Short> cast) 4123 { 4124 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 4125 4126 storeValue(integer); 4127 } 4128 4129 Int::Int(RValue<UShort> cast) 4130 { 4131 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 4132 4133 storeValue(integer); 4134 } 4135 4136 Int::Int(RValue<Int2> cast) 4137 { 4138 *this = Extract(cast, 0); 4139 } 4140 4141 Int::Int(RValue<Long> cast) 4142 { 4143 Value *integer = Nucleus::createTrunc(cast.value, Int::getType()); 4144 4145 storeValue(integer); 4146 } 4147 4148 Int::Int(RValue<Float> cast) 4149 { 4150 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType()); 4151 4152 storeValue(integer); 4153 } 4154 4155 Int::Int(int x) 4156 { 4157 storeValue(Nucleus::createConstantInt(x)); 4158 } 4159 4160 Int::Int(RValue<Int> rhs) 4161 { 4162 storeValue(rhs.value); 4163 } 4164 4165 Int::Int(RValue<UInt> rhs) 4166 { 4167 storeValue(rhs.value); 4168 } 4169 4170 Int::Int(const Int &rhs) 4171 { 4172 Value *value = rhs.loadValue(); 4173 storeValue(value); 4174 } 4175 4176 Int::Int(const Reference<Int> &rhs) 4177 { 4178 Value *value = rhs.loadValue(); 4179 storeValue(value); 4180 } 4181 4182 Int::Int(const UInt &rhs) 4183 { 4184 Value *value = rhs.loadValue(); 4185 storeValue(value); 4186 } 4187 4188 Int::Int(const Reference<UInt> &rhs) 4189 { 4190 Value *value = rhs.loadValue(); 4191 storeValue(value); 4192 } 4193 4194 RValue<Int> Int::operator=(int rhs) 4195 { 4196 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs))); 4197 } 4198 4199 RValue<Int> Int::operator=(RValue<Int> rhs) 4200 { 4201 storeValue(rhs.value); 4202 4203 return rhs; 4204 } 4205 4206 RValue<Int> Int::operator=(RValue<UInt> rhs) 4207 { 4208 storeValue(rhs.value); 4209 4210 return RValue<Int>(rhs); 4211 } 4212 4213 RValue<Int> Int::operator=(const Int &rhs) 4214 { 4215 Value *value = rhs.loadValue(); 4216 storeValue(value); 4217 4218 return RValue<Int>(value); 4219 } 4220 4221 RValue<Int> Int::operator=(const Reference<Int> &rhs) 4222 { 4223 Value *value = rhs.loadValue(); 4224 storeValue(value); 4225 4226 return RValue<Int>(value); 4227 } 4228 4229 RValue<Int> Int::operator=(const UInt &rhs) 4230 { 4231 Value *value = rhs.loadValue(); 4232 storeValue(value); 4233 4234 return RValue<Int>(value); 4235 } 4236 4237 RValue<Int> Int::operator=(const Reference<UInt> &rhs) 4238 { 4239 Value *value = rhs.loadValue(); 4240 storeValue(value); 4241 4242 return RValue<Int>(value); 4243 } 4244 4245 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs) 4246 { 4247 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value)); 4248 } 4249 4250 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs) 4251 { 4252 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value)); 4253 } 4254 4255 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs) 4256 { 4257 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value)); 4258 } 4259 4260 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs) 4261 { 4262 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value)); 4263 } 4264 4265 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs) 4266 { 4267 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value)); 4268 } 4269 4270 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs) 4271 { 4272 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value)); 4273 } 4274 4275 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs) 4276 { 4277 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value)); 4278 } 4279 4280 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs) 4281 { 4282 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value)); 4283 } 4284 4285 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs) 4286 { 4287 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value)); 4288 } 4289 4290 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs) 4291 { 4292 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value)); 4293 } 4294 4295 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs) 4296 { 4297 return lhs = lhs + rhs; 4298 } 4299 4300 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs) 4301 { 4302 return lhs = lhs - rhs; 4303 } 4304 4305 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs) 4306 { 4307 return lhs = lhs * rhs; 4308 } 4309 4310 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs) 4311 { 4312 return lhs = lhs / rhs; 4313 } 4314 4315 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs) 4316 { 4317 return lhs = lhs % rhs; 4318 } 4319 4320 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs) 4321 { 4322 return lhs = lhs & rhs; 4323 } 4324 4325 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs) 4326 { 4327 return lhs = lhs | rhs; 4328 } 4329 4330 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs) 4331 { 4332 return lhs = lhs ^ rhs; 4333 } 4334 4335 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs) 4336 { 4337 return lhs = lhs << rhs; 4338 } 4339 4340 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs) 4341 { 4342 return lhs = lhs >> rhs; 4343 } 4344 4345 RValue<Int> operator+(RValue<Int> val) 4346 { 4347 return val; 4348 } 4349 4350 RValue<Int> operator-(RValue<Int> val) 4351 { 4352 return RValue<Int>(Nucleus::createNeg(val.value)); 4353 } 4354 4355 RValue<Int> operator~(RValue<Int> val) 4356 { 4357 return RValue<Int>(Nucleus::createNot(val.value)); 4358 } 4359 4360 RValue<Int> operator++(Int &val, int) // Post-increment 4361 { 4362 RValue<Int> res = val; 4363 4364 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1)); 4365 val.storeValue(inc); 4366 4367 return res; 4368 } 4369 4370 const Int &operator++(Int &val) // Pre-increment 4371 { 4372 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1)); 4373 val.storeValue(inc); 4374 4375 return val; 4376 } 4377 4378 RValue<Int> operator--(Int &val, int) // Post-decrement 4379 { 4380 RValue<Int> res = val; 4381 4382 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1)); 4383 val.storeValue(inc); 4384 4385 return res; 4386 } 4387 4388 const Int &operator--(Int &val) // Pre-decrement 4389 { 4390 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1)); 4391 val.storeValue(inc); 4392 4393 return val; 4394 } 4395 4396 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs) 4397 { 4398 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 4399 } 4400 4401 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs) 4402 { 4403 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 4404 } 4405 4406 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs) 4407 { 4408 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 4409 } 4410 4411 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs) 4412 { 4413 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 4414 } 4415 4416 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs) 4417 { 4418 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4419 } 4420 4421 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs) 4422 { 4423 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4424 } 4425 4426 RValue<Int> Max(RValue<Int> x, RValue<Int> y) 4427 { 4428 return IfThenElse(x > y, x, y); 4429 } 4430 4431 RValue<Int> Min(RValue<Int> x, RValue<Int> y) 4432 { 4433 return IfThenElse(x < y, x, y); 4434 } 4435 4436 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max) 4437 { 4438 return Min(Max(x, min), max); 4439 } 4440 4441 RValue<Int> RoundInt(RValue<Float> cast) 4442 { 4443 #if defined(__i386__) || defined(__x86_64__) 4444 return x86::cvtss2si(cast); 4445 #else 4446 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType())))); 4447 #endif 4448 } 4449 4450 Type *Int::getType() 4451 { 4452 return T(llvm::Type::getInt32Ty(*::context)); 4453 } 4454 4455 Long::Long(RValue<Int> cast) 4456 { 4457 Value *integer = Nucleus::createSExt(cast.value, Long::getType()); 4458 4459 storeValue(integer); 4460 } 4461 4462 Long::Long(RValue<UInt> cast) 4463 { 4464 Value *integer = Nucleus::createZExt(cast.value, Long::getType()); 4465 4466 storeValue(integer); 4467 } 4468 4469 Long::Long(RValue<Long> rhs) 4470 { 4471 storeValue(rhs.value); 4472 } 4473 4474 RValue<Long> Long::operator=(int64_t rhs) 4475 { 4476 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs))); 4477 } 4478 4479 RValue<Long> Long::operator=(RValue<Long> rhs) 4480 { 4481 storeValue(rhs.value); 4482 4483 return rhs; 4484 } 4485 4486 RValue<Long> Long::operator=(const Long &rhs) 4487 { 4488 Value *value = rhs.loadValue(); 4489 storeValue(value); 4490 4491 return RValue<Long>(value); 4492 } 4493 4494 RValue<Long> Long::operator=(const Reference<Long> &rhs) 4495 { 4496 Value *value = rhs.loadValue(); 4497 storeValue(value); 4498 4499 return RValue<Long>(value); 4500 } 4501 4502 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs) 4503 { 4504 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value)); 4505 } 4506 4507 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs) 4508 { 4509 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); 4510 } 4511 4512 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs) 4513 { 4514 return lhs = lhs + rhs; 4515 } 4516 4517 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs) 4518 { 4519 return lhs = lhs - rhs; 4520 } 4521 4522 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y) 4523 { 4524 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value)); 4525 } 4526 4527 Type *Long::getType() 4528 { 4529 return T(llvm::Type::getInt64Ty(*::context)); 4530 } 4531 4532 UInt::UInt(Argument<UInt> argument) 4533 { 4534 storeValue(argument.value); 4535 } 4536 4537 UInt::UInt(RValue<UShort> cast) 4538 { 4539 Value *integer = Nucleus::createZExt(cast.value, UInt::getType()); 4540 4541 storeValue(integer); 4542 } 4543 4544 UInt::UInt(RValue<Long> cast) 4545 { 4546 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType()); 4547 4548 storeValue(integer); 4549 } 4550 4551 UInt::UInt(RValue<Float> cast) 4552 { 4553 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 4554 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType()); 4555 4556 // Smallest positive value representable in UInt, but not in Int 4557 const unsigned int ustart = 0x80000000u; 4558 const float ustartf = float(ustart); 4559 4560 // If the value is negative, store 0, otherwise store the result of the conversion 4561 storeValue((~(As<Int>(cast) >> 31) & 4562 // Check if the value can be represented as an Int 4563 IfThenElse(cast >= ustartf, 4564 // If the value is too large, subtract ustart and re-add it after conversion. 4565 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)), 4566 // Otherwise, just convert normally 4567 Int(cast))).value); 4568 } 4569 4570 UInt::UInt(int x) 4571 { 4572 storeValue(Nucleus::createConstantInt(x)); 4573 } 4574 4575 UInt::UInt(unsigned int x) 4576 { 4577 storeValue(Nucleus::createConstantInt(x)); 4578 } 4579 4580 UInt::UInt(RValue<UInt> rhs) 4581 { 4582 storeValue(rhs.value); 4583 } 4584 4585 UInt::UInt(RValue<Int> rhs) 4586 { 4587 storeValue(rhs.value); 4588 } 4589 4590 UInt::UInt(const UInt &rhs) 4591 { 4592 Value *value = rhs.loadValue(); 4593 storeValue(value); 4594 } 4595 4596 UInt::UInt(const Reference<UInt> &rhs) 4597 { 4598 Value *value = rhs.loadValue(); 4599 storeValue(value); 4600 } 4601 4602 UInt::UInt(const Int &rhs) 4603 { 4604 Value *value = rhs.loadValue(); 4605 storeValue(value); 4606 } 4607 4608 UInt::UInt(const Reference<Int> &rhs) 4609 { 4610 Value *value = rhs.loadValue(); 4611 storeValue(value); 4612 } 4613 4614 RValue<UInt> UInt::operator=(unsigned int rhs) 4615 { 4616 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs))); 4617 } 4618 4619 RValue<UInt> UInt::operator=(RValue<UInt> rhs) 4620 { 4621 storeValue(rhs.value); 4622 4623 return rhs; 4624 } 4625 4626 RValue<UInt> UInt::operator=(RValue<Int> rhs) 4627 { 4628 storeValue(rhs.value); 4629 4630 return RValue<UInt>(rhs); 4631 } 4632 4633 RValue<UInt> UInt::operator=(const UInt &rhs) 4634 { 4635 Value *value = rhs.loadValue(); 4636 storeValue(value); 4637 4638 return RValue<UInt>(value); 4639 } 4640 4641 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) 4642 { 4643 Value *value = rhs.loadValue(); 4644 storeValue(value); 4645 4646 return RValue<UInt>(value); 4647 } 4648 4649 RValue<UInt> UInt::operator=(const Int &rhs) 4650 { 4651 Value *value = rhs.loadValue(); 4652 storeValue(value); 4653 4654 return RValue<UInt>(value); 4655 } 4656 4657 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) 4658 { 4659 Value *value = rhs.loadValue(); 4660 storeValue(value); 4661 4662 return RValue<UInt>(value); 4663 } 4664 4665 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs) 4666 { 4667 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value)); 4668 } 4669 4670 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs) 4671 { 4672 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value)); 4673 } 4674 4675 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs) 4676 { 4677 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value)); 4678 } 4679 4680 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs) 4681 { 4682 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value)); 4683 } 4684 4685 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs) 4686 { 4687 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value)); 4688 } 4689 4690 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs) 4691 { 4692 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value)); 4693 } 4694 4695 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs) 4696 { 4697 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value)); 4698 } 4699 4700 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs) 4701 { 4702 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value)); 4703 } 4704 4705 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs) 4706 { 4707 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value)); 4708 } 4709 4710 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs) 4711 { 4712 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value)); 4713 } 4714 4715 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs) 4716 { 4717 return lhs = lhs + rhs; 4718 } 4719 4720 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs) 4721 { 4722 return lhs = lhs - rhs; 4723 } 4724 4725 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs) 4726 { 4727 return lhs = lhs * rhs; 4728 } 4729 4730 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs) 4731 { 4732 return lhs = lhs / rhs; 4733 } 4734 4735 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs) 4736 { 4737 return lhs = lhs % rhs; 4738 } 4739 4740 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs) 4741 { 4742 return lhs = lhs & rhs; 4743 } 4744 4745 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs) 4746 { 4747 return lhs = lhs | rhs; 4748 } 4749 4750 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs) 4751 { 4752 return lhs = lhs ^ rhs; 4753 } 4754 4755 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs) 4756 { 4757 return lhs = lhs << rhs; 4758 } 4759 4760 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs) 4761 { 4762 return lhs = lhs >> rhs; 4763 } 4764 4765 RValue<UInt> operator+(RValue<UInt> val) 4766 { 4767 return val; 4768 } 4769 4770 RValue<UInt> operator-(RValue<UInt> val) 4771 { 4772 return RValue<UInt>(Nucleus::createNeg(val.value)); 4773 } 4774 4775 RValue<UInt> operator~(RValue<UInt> val) 4776 { 4777 return RValue<UInt>(Nucleus::createNot(val.value)); 4778 } 4779 4780 RValue<UInt> operator++(UInt &val, int) // Post-increment 4781 { 4782 RValue<UInt> res = val; 4783 4784 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1)); 4785 val.storeValue(inc); 4786 4787 return res; 4788 } 4789 4790 const UInt &operator++(UInt &val) // Pre-increment 4791 { 4792 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1)); 4793 val.storeValue(inc); 4794 4795 return val; 4796 } 4797 4798 RValue<UInt> operator--(UInt &val, int) // Post-decrement 4799 { 4800 RValue<UInt> res = val; 4801 4802 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1)); 4803 val.storeValue(inc); 4804 4805 return res; 4806 } 4807 4808 const UInt &operator--(UInt &val) // Pre-decrement 4809 { 4810 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1)); 4811 val.storeValue(inc); 4812 4813 return val; 4814 } 4815 4816 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y) 4817 { 4818 return IfThenElse(x > y, x, y); 4819 } 4820 4821 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y) 4822 { 4823 return IfThenElse(x < y, x, y); 4824 } 4825 4826 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max) 4827 { 4828 return Min(Max(x, min), max); 4829 } 4830 4831 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs) 4832 { 4833 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 4834 } 4835 4836 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs) 4837 { 4838 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 4839 } 4840 4841 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs) 4842 { 4843 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 4844 } 4845 4846 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs) 4847 { 4848 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 4849 } 4850 4851 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs) 4852 { 4853 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4854 } 4855 4856 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs) 4857 { 4858 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4859 } 4860 4861 // RValue<UInt> RoundUInt(RValue<Float> cast) 4862 // { 4863 //#if defined(__i386__) || defined(__x86_64__) 4864 // return x86::cvtss2si(val); // FIXME: Unsigned 4865 //#else 4866 // return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f)); 4867 //#endif 4868 // } 4869 4870 Type *UInt::getType() 4871 { 4872 return T(llvm::Type::getInt32Ty(*::context)); 4873 } 4874 4875 // Int2::Int2(RValue<Int> cast) 4876 // { 4877 // Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 4878 // Value *vector = Nucleus::createBitCast(extend, Int2::getType()); 4879 // 4880 // int shuffle[2] = {0, 0}; 4881 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle); 4882 // 4883 // storeValue(replicate); 4884 // } 4885 4886 Int2::Int2(RValue<Int4> cast) 4887 { 4888 storeValue(Nucleus::createBitCast(cast.value, getType())); 4889 } 4890 4891 Int2::Int2(int x, int y) 4892 { 4893 int64_t constantVector[2] = {x, y}; 4894 storeValue(Nucleus::createConstantVector(constantVector, getType())); 4895 } 4896 4897 Int2::Int2(RValue<Int2> rhs) 4898 { 4899 storeValue(rhs.value); 4900 } 4901 4902 Int2::Int2(const Int2 &rhs) 4903 { 4904 Value *value = rhs.loadValue(); 4905 storeValue(value); 4906 } 4907 4908 Int2::Int2(const Reference<Int2> &rhs) 4909 { 4910 Value *value = rhs.loadValue(); 4911 storeValue(value); 4912 } 4913 4914 Int2::Int2(RValue<Int> lo, RValue<Int> hi) 4915 { 4916 int shuffle[4] = {0, 4, 1, 5}; 4917 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle); 4918 4919 storeValue(Nucleus::createBitCast(packed, Int2::getType())); 4920 } 4921 4922 RValue<Int2> Int2::operator=(RValue<Int2> rhs) 4923 { 4924 storeValue(rhs.value); 4925 4926 return rhs; 4927 } 4928 4929 RValue<Int2> Int2::operator=(const Int2 &rhs) 4930 { 4931 Value *value = rhs.loadValue(); 4932 storeValue(value); 4933 4934 return RValue<Int2>(value); 4935 } 4936 4937 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) 4938 { 4939 Value *value = rhs.loadValue(); 4940 storeValue(value); 4941 4942 return RValue<Int2>(value); 4943 } 4944 4945 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) 4946 { 4947 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value)); 4948 } 4949 4950 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) 4951 { 4952 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value)); 4953 } 4954 4955 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) 4956 // { 4957 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value)); 4958 // } 4959 4960 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs) 4961 // { 4962 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value)); 4963 // } 4964 4965 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs) 4966 // { 4967 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value)); 4968 // } 4969 4970 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) 4971 { 4972 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value)); 4973 } 4974 4975 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) 4976 { 4977 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value)); 4978 } 4979 4980 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) 4981 { 4982 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value)); 4983 } 4984 4985 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) 4986 { 4987 #if defined(__i386__) || defined(__x86_64__) 4988 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4989 4990 return x86::pslld(lhs, rhs); 4991 #else 4992 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs))); 4993 #endif 4994 } 4995 4996 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs) 4997 { 4998 #if defined(__i386__) || defined(__x86_64__) 4999 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 5000 5001 return x86::psrad(lhs, rhs); 5002 #else 5003 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs))); 5004 #endif 5005 } 5006 5007 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs) 5008 { 5009 return lhs = lhs + rhs; 5010 } 5011 5012 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs) 5013 { 5014 return lhs = lhs - rhs; 5015 } 5016 5017 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs) 5018 // { 5019 // return lhs = lhs * rhs; 5020 // } 5021 5022 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs) 5023 // { 5024 // return lhs = lhs / rhs; 5025 // } 5026 5027 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs) 5028 // { 5029 // return lhs = lhs % rhs; 5030 // } 5031 5032 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs) 5033 { 5034 return lhs = lhs & rhs; 5035 } 5036 5037 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs) 5038 { 5039 return lhs = lhs | rhs; 5040 } 5041 5042 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs) 5043 { 5044 return lhs = lhs ^ rhs; 5045 } 5046 5047 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs) 5048 { 5049 return lhs = lhs << rhs; 5050 } 5051 5052 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs) 5053 { 5054 return lhs = lhs >> rhs; 5055 } 5056 5057 // RValue<Int2> operator+(RValue<Int2> val) 5058 // { 5059 // return val; 5060 // } 5061 5062 // RValue<Int2> operator-(RValue<Int2> val) 5063 // { 5064 // return RValue<Int2>(Nucleus::createNeg(val.value)); 5065 // } 5066 5067 RValue<Int2> operator~(RValue<Int2> val) 5068 { 5069 return RValue<Int2>(Nucleus::createNot(val.value)); 5070 } 5071 5072 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) 5073 { 5074 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 5075 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5076 } 5077 5078 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) 5079 { 5080 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32 5081 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 5082 return As<Short4>(Swizzle(lowHigh, 0xEE)); 5083 } 5084 5085 RValue<Int> Extract(RValue<Int2> val, int i) 5086 { 5087 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i)); 5088 } 5089 5090 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) 5091 { 5092 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i)); 5093 } 5094 5095 Type *Int2::getType() 5096 { 5097 return T(Type_v2i32); 5098 } 5099 5100 UInt2::UInt2(unsigned int x, unsigned int y) 5101 { 5102 int64_t constantVector[2] = {x, y}; 5103 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5104 } 5105 5106 UInt2::UInt2(RValue<UInt2> rhs) 5107 { 5108 storeValue(rhs.value); 5109 } 5110 5111 UInt2::UInt2(const UInt2 &rhs) 5112 { 5113 Value *value = rhs.loadValue(); 5114 storeValue(value); 5115 } 5116 5117 UInt2::UInt2(const Reference<UInt2> &rhs) 5118 { 5119 Value *value = rhs.loadValue(); 5120 storeValue(value); 5121 } 5122 5123 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) 5124 { 5125 storeValue(rhs.value); 5126 5127 return rhs; 5128 } 5129 5130 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) 5131 { 5132 Value *value = rhs.loadValue(); 5133 storeValue(value); 5134 5135 return RValue<UInt2>(value); 5136 } 5137 5138 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) 5139 { 5140 Value *value = rhs.loadValue(); 5141 storeValue(value); 5142 5143 return RValue<UInt2>(value); 5144 } 5145 5146 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) 5147 { 5148 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value)); 5149 } 5150 5151 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) 5152 { 5153 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value)); 5154 } 5155 5156 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) 5157 // { 5158 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value)); 5159 // } 5160 5161 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs) 5162 // { 5163 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value)); 5164 // } 5165 5166 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs) 5167 // { 5168 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value)); 5169 // } 5170 5171 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) 5172 { 5173 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value)); 5174 } 5175 5176 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) 5177 { 5178 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value)); 5179 } 5180 5181 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) 5182 { 5183 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value)); 5184 } 5185 5186 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) 5187 { 5188 #if defined(__i386__) || defined(__x86_64__) 5189 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 5190 5191 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 5192 #else 5193 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs))); 5194 #endif 5195 } 5196 5197 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs) 5198 { 5199 #if defined(__i386__) || defined(__x86_64__) 5200 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 5201 5202 return x86::psrld(lhs, rhs); 5203 #else 5204 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs))); 5205 #endif 5206 } 5207 5208 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs) 5209 { 5210 return lhs = lhs + rhs; 5211 } 5212 5213 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs) 5214 { 5215 return lhs = lhs - rhs; 5216 } 5217 5218 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs) 5219 // { 5220 // return lhs = lhs * rhs; 5221 // } 5222 5223 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs) 5224 // { 5225 // return lhs = lhs / rhs; 5226 // } 5227 5228 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs) 5229 // { 5230 // return lhs = lhs % rhs; 5231 // } 5232 5233 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs) 5234 { 5235 return lhs = lhs & rhs; 5236 } 5237 5238 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs) 5239 { 5240 return lhs = lhs | rhs; 5241 } 5242 5243 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs) 5244 { 5245 return lhs = lhs ^ rhs; 5246 } 5247 5248 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs) 5249 { 5250 return lhs = lhs << rhs; 5251 } 5252 5253 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs) 5254 { 5255 return lhs = lhs >> rhs; 5256 } 5257 5258 // RValue<UInt2> operator+(RValue<UInt2> val) 5259 // { 5260 // return val; 5261 // } 5262 5263 // RValue<UInt2> operator-(RValue<UInt2> val) 5264 // { 5265 // return RValue<UInt2>(Nucleus::createNeg(val.value)); 5266 // } 5267 5268 RValue<UInt2> operator~(RValue<UInt2> val) 5269 { 5270 return RValue<UInt2>(Nucleus::createNot(val.value)); 5271 } 5272 5273 Type *UInt2::getType() 5274 { 5275 return T(Type_v2i32); 5276 } 5277 5278 Int4::Int4() : XYZW(this) 5279 { 5280 } 5281 5282 Int4::Int4(RValue<Byte4> cast) : XYZW(this) 5283 { 5284 #if defined(__i386__) || defined(__x86_64__) 5285 if(CPUID::supportsSSE4_1()) 5286 { 5287 *this = x86::pmovzxbd(As<Byte16>(cast)); 5288 } 5289 else 5290 #endif 5291 { 5292 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; 5293 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 5294 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle); 5295 5296 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 5297 Value *c = Nucleus::createBitCast(b, Short8::getType()); 5298 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2); 5299 5300 *this = As<Int4>(d); 5301 } 5302 } 5303 5304 Int4::Int4(RValue<SByte4> cast) : XYZW(this) 5305 { 5306 #if defined(__i386__) || defined(__x86_64__) 5307 if(CPUID::supportsSSE4_1()) 5308 { 5309 *this = x86::pmovsxbd(As<SByte16>(cast)); 5310 } 5311 else 5312 #endif 5313 { 5314 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; 5315 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType()); 5316 Value *b = Nucleus::createShuffleVector(a, a, swizzle); 5317 5318 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 5319 Value *c = Nucleus::createBitCast(b, Short8::getType()); 5320 Value *d = Nucleus::createShuffleVector(c, c, swizzle2); 5321 5322 *this = As<Int4>(d) >> 24; 5323 } 5324 } 5325 5326 Int4::Int4(RValue<Float4> cast) : XYZW(this) 5327 { 5328 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType()); 5329 5330 storeValue(xyzw); 5331 } 5332 5333 Int4::Int4(RValue<Short4> cast) : XYZW(this) 5334 { 5335 #if defined(__i386__) || defined(__x86_64__) 5336 if(CPUID::supportsSSE4_1()) 5337 { 5338 *this = x86::pmovsxwd(As<Short8>(cast)); 5339 } 5340 else 5341 #endif 5342 { 5343 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 5344 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle); 5345 *this = As<Int4>(c) >> 16; 5346 } 5347 } 5348 5349 Int4::Int4(RValue<UShort4> cast) : XYZW(this) 5350 { 5351 #if defined(__i386__) || defined(__x86_64__) 5352 if(CPUID::supportsSSE4_1()) 5353 { 5354 *this = x86::pmovzxwd(As<UShort8>(cast)); 5355 } 5356 else 5357 #endif 5358 { 5359 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 5360 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle); 5361 *this = As<Int4>(c); 5362 } 5363 } 5364 5365 Int4::Int4(int xyzw) : XYZW(this) 5366 { 5367 constant(xyzw, xyzw, xyzw, xyzw); 5368 } 5369 5370 Int4::Int4(int x, int yzw) : XYZW(this) 5371 { 5372 constant(x, yzw, yzw, yzw); 5373 } 5374 5375 Int4::Int4(int x, int y, int zw) : XYZW(this) 5376 { 5377 constant(x, y, zw, zw); 5378 } 5379 5380 Int4::Int4(int x, int y, int z, int w) : XYZW(this) 5381 { 5382 constant(x, y, z, w); 5383 } 5384 5385 void Int4::constant(int x, int y, int z, int w) 5386 { 5387 int64_t constantVector[4] = {x, y, z, w}; 5388 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5389 } 5390 5391 Int4::Int4(RValue<Int4> rhs) : XYZW(this) 5392 { 5393 storeValue(rhs.value); 5394 } 5395 5396 Int4::Int4(const Int4 &rhs) : XYZW(this) 5397 { 5398 Value *value = rhs.loadValue(); 5399 storeValue(value); 5400 } 5401 5402 Int4::Int4(const Reference<Int4> &rhs) : XYZW(this) 5403 { 5404 Value *value = rhs.loadValue(); 5405 storeValue(value); 5406 } 5407 5408 Int4::Int4(RValue<UInt4> rhs) : XYZW(this) 5409 { 5410 storeValue(rhs.value); 5411 } 5412 5413 Int4::Int4(const UInt4 &rhs) : XYZW(this) 5414 { 5415 Value *value = rhs.loadValue(); 5416 storeValue(value); 5417 } 5418 5419 Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this) 5420 { 5421 Value *value = rhs.loadValue(); 5422 storeValue(value); 5423 } 5424 5425 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this) 5426 { 5427 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 5428 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 5429 5430 storeValue(packed); 5431 } 5432 5433 Int4::Int4(RValue<Int> rhs) : XYZW(this) 5434 { 5435 Value *vector = loadValue(); 5436 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 5437 5438 int swizzle[4] = {0, 0, 0, 0}; 5439 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 5440 5441 storeValue(replicate); 5442 } 5443 5444 Int4::Int4(const Int &rhs) : XYZW(this) 5445 { 5446 *this = RValue<Int>(rhs.loadValue()); 5447 } 5448 5449 Int4::Int4(const Reference<Int> &rhs) : XYZW(this) 5450 { 5451 *this = RValue<Int>(rhs.loadValue()); 5452 } 5453 5454 RValue<Int4> Int4::operator=(RValue<Int4> rhs) 5455 { 5456 storeValue(rhs.value); 5457 5458 return rhs; 5459 } 5460 5461 RValue<Int4> Int4::operator=(const Int4 &rhs) 5462 { 5463 Value *value = rhs.loadValue(); 5464 storeValue(value); 5465 5466 return RValue<Int4>(value); 5467 } 5468 5469 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) 5470 { 5471 Value *value = rhs.loadValue(); 5472 storeValue(value); 5473 5474 return RValue<Int4>(value); 5475 } 5476 5477 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs) 5478 { 5479 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value)); 5480 } 5481 5482 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs) 5483 { 5484 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value)); 5485 } 5486 5487 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs) 5488 { 5489 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value)); 5490 } 5491 5492 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs) 5493 { 5494 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value)); 5495 } 5496 5497 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs) 5498 { 5499 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value)); 5500 } 5501 5502 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs) 5503 { 5504 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value)); 5505 } 5506 5507 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs) 5508 { 5509 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value)); 5510 } 5511 5512 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs) 5513 { 5514 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value)); 5515 } 5516 5517 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs) 5518 { 5519 #if defined(__i386__) || defined(__x86_64__) 5520 return x86::pslld(lhs, rhs); 5521 #else 5522 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs))); 5523 #endif 5524 } 5525 5526 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs) 5527 { 5528 #if defined(__i386__) || defined(__x86_64__) 5529 return x86::psrad(lhs, rhs); 5530 #else 5531 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs))); 5532 #endif 5533 } 5534 5535 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs) 5536 { 5537 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value)); 5538 } 5539 5540 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs) 5541 { 5542 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value)); 5543 } 5544 5545 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs) 5546 { 5547 return lhs = lhs + rhs; 5548 } 5549 5550 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs) 5551 { 5552 return lhs = lhs - rhs; 5553 } 5554 5555 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs) 5556 { 5557 return lhs = lhs * rhs; 5558 } 5559 5560 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs) 5561 // { 5562 // return lhs = lhs / rhs; 5563 // } 5564 5565 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs) 5566 // { 5567 // return lhs = lhs % rhs; 5568 // } 5569 5570 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs) 5571 { 5572 return lhs = lhs & rhs; 5573 } 5574 5575 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs) 5576 { 5577 return lhs = lhs | rhs; 5578 } 5579 5580 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs) 5581 { 5582 return lhs = lhs ^ rhs; 5583 } 5584 5585 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs) 5586 { 5587 return lhs = lhs << rhs; 5588 } 5589 5590 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs) 5591 { 5592 return lhs = lhs >> rhs; 5593 } 5594 5595 RValue<Int4> operator+(RValue<Int4> val) 5596 { 5597 return val; 5598 } 5599 5600 RValue<Int4> operator-(RValue<Int4> val) 5601 { 5602 return RValue<Int4>(Nucleus::createNeg(val.value)); 5603 } 5604 5605 RValue<Int4> operator~(RValue<Int4> val) 5606 { 5607 return RValue<Int4>(Nucleus::createNot(val.value)); 5608 } 5609 5610 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y) 5611 { 5612 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5613 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5614 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5615 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5616 } 5617 5618 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y) 5619 { 5620 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5621 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5622 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())); 5623 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5624 } 5625 5626 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y) 5627 { 5628 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5629 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5630 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())); 5631 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5632 } 5633 5634 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y) 5635 { 5636 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5637 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5638 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5639 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5640 } 5641 5642 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y) 5643 { 5644 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5645 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5646 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())); 5647 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5648 } 5649 5650 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y) 5651 { 5652 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5653 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5654 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())); 5655 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5656 } 5657 5658 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y) 5659 { 5660 #if defined(__i386__) || defined(__x86_64__) 5661 if(CPUID::supportsSSE4_1()) 5662 { 5663 return x86::pmaxsd(x, y); 5664 } 5665 else 5666 #endif 5667 { 5668 RValue<Int4> greater = CmpNLE(x, y); 5669 return (x & greater) | (y & ~greater); 5670 } 5671 } 5672 5673 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y) 5674 { 5675 #if defined(__i386__) || defined(__x86_64__) 5676 if(CPUID::supportsSSE4_1()) 5677 { 5678 return x86::pminsd(x, y); 5679 } 5680 else 5681 #endif 5682 { 5683 RValue<Int4> less = CmpLT(x, y); 5684 return (x & less) | (y & ~less); 5685 } 5686 } 5687 5688 RValue<Int4> RoundInt(RValue<Float4> cast) 5689 { 5690 #if defined(__i386__) || defined(__x86_64__) 5691 return x86::cvtps2dq(cast); 5692 #else 5693 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType())))); 5694 #endif 5695 } 5696 5697 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y) 5698 { 5699 #if defined(__i386__) || defined(__x86_64__) 5700 return x86::packssdw(x, y); 5701 #else 5702 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true))); 5703 #endif 5704 } 5705 5706 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y) 5707 { 5708 #if defined(__i386__) || defined(__x86_64__) 5709 return x86::packusdw(x, y); 5710 #else 5711 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false))); 5712 #endif 5713 } 5714 5715 RValue<Int> Extract(RValue<Int4> x, int i) 5716 { 5717 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i)); 5718 } 5719 5720 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i) 5721 { 5722 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i)); 5723 } 5724 5725 RValue<Int> SignMask(RValue<Int4> x) 5726 { 5727 #if defined(__i386__) || defined(__x86_64__) 5728 return x86::movmskps(As<Float4>(x)); 5729 #else 5730 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType())))); 5731 #endif 5732 } 5733 5734 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) 5735 { 5736 return RValue<Int4>(createSwizzle4(x.value, select)); 5737 } 5738 5739 Type *Int4::getType() 5740 { 5741 return T(llvm::VectorType::get(T(Int::getType()), 4)); 5742 } 5743 5744 UInt4::UInt4() : XYZW(this) 5745 { 5746 } 5747 5748 UInt4::UInt4(RValue<Float4> cast) : XYZW(this) 5749 { 5750 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 5751 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType()); 5752 5753 // Smallest positive value representable in UInt, but not in Int 5754 const unsigned int ustart = 0x80000000u; 5755 const float ustartf = float(ustart); 5756 5757 // Check if the value can be represented as an Int 5758 Int4 uiValue = CmpNLT(cast, Float4(ustartf)); 5759 // If the value is too large, subtract ustart and re-add it after conversion. 5760 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) | 5761 // Otherwise, just convert normally 5762 (~uiValue & Int4(cast)); 5763 // If the value is negative, store 0, otherwise store the result of the conversion 5764 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value); 5765 } 5766 5767 UInt4::UInt4(int xyzw) : XYZW(this) 5768 { 5769 constant(xyzw, xyzw, xyzw, xyzw); 5770 } 5771 5772 UInt4::UInt4(int x, int yzw) : XYZW(this) 5773 { 5774 constant(x, yzw, yzw, yzw); 5775 } 5776 5777 UInt4::UInt4(int x, int y, int zw) : XYZW(this) 5778 { 5779 constant(x, y, zw, zw); 5780 } 5781 5782 UInt4::UInt4(int x, int y, int z, int w) : XYZW(this) 5783 { 5784 constant(x, y, z, w); 5785 } 5786 5787 void UInt4::constant(int x, int y, int z, int w) 5788 { 5789 int64_t constantVector[4] = {x, y, z, w}; 5790 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5791 } 5792 5793 UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this) 5794 { 5795 storeValue(rhs.value); 5796 } 5797 5798 UInt4::UInt4(const UInt4 &rhs) : XYZW(this) 5799 { 5800 Value *value = rhs.loadValue(); 5801 storeValue(value); 5802 } 5803 5804 UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this) 5805 { 5806 Value *value = rhs.loadValue(); 5807 storeValue(value); 5808 } 5809 5810 UInt4::UInt4(RValue<Int4> rhs) : XYZW(this) 5811 { 5812 storeValue(rhs.value); 5813 } 5814 5815 UInt4::UInt4(const Int4 &rhs) : XYZW(this) 5816 { 5817 Value *value = rhs.loadValue(); 5818 storeValue(value); 5819 } 5820 5821 UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this) 5822 { 5823 Value *value = rhs.loadValue(); 5824 storeValue(value); 5825 } 5826 5827 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this) 5828 { 5829 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32 5830 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle); 5831 5832 storeValue(packed); 5833 } 5834 5835 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) 5836 { 5837 storeValue(rhs.value); 5838 5839 return rhs; 5840 } 5841 5842 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) 5843 { 5844 Value *value = rhs.loadValue(); 5845 storeValue(value); 5846 5847 return RValue<UInt4>(value); 5848 } 5849 5850 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) 5851 { 5852 Value *value = rhs.loadValue(); 5853 storeValue(value); 5854 5855 return RValue<UInt4>(value); 5856 } 5857 5858 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs) 5859 { 5860 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value)); 5861 } 5862 5863 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs) 5864 { 5865 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value)); 5866 } 5867 5868 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs) 5869 { 5870 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value)); 5871 } 5872 5873 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs) 5874 { 5875 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value)); 5876 } 5877 5878 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs) 5879 { 5880 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value)); 5881 } 5882 5883 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs) 5884 { 5885 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value)); 5886 } 5887 5888 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs) 5889 { 5890 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value)); 5891 } 5892 5893 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs) 5894 { 5895 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value)); 5896 } 5897 5898 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs) 5899 { 5900 #if defined(__i386__) || defined(__x86_64__) 5901 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs)); 5902 #else 5903 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs))); 5904 #endif 5905 } 5906 5907 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs) 5908 { 5909 #if defined(__i386__) || defined(__x86_64__) 5910 return x86::psrld(lhs, rhs); 5911 #else 5912 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs))); 5913 #endif 5914 } 5915 5916 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs) 5917 { 5918 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value)); 5919 } 5920 5921 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs) 5922 { 5923 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value)); 5924 } 5925 5926 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs) 5927 { 5928 return lhs = lhs + rhs; 5929 } 5930 5931 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs) 5932 { 5933 return lhs = lhs - rhs; 5934 } 5935 5936 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs) 5937 { 5938 return lhs = lhs * rhs; 5939 } 5940 5941 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs) 5942 // { 5943 // return lhs = lhs / rhs; 5944 // } 5945 5946 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs) 5947 // { 5948 // return lhs = lhs % rhs; 5949 // } 5950 5951 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs) 5952 { 5953 return lhs = lhs & rhs; 5954 } 5955 5956 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs) 5957 { 5958 return lhs = lhs | rhs; 5959 } 5960 5961 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs) 5962 { 5963 return lhs = lhs ^ rhs; 5964 } 5965 5966 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs) 5967 { 5968 return lhs = lhs << rhs; 5969 } 5970 5971 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs) 5972 { 5973 return lhs = lhs >> rhs; 5974 } 5975 5976 RValue<UInt4> operator+(RValue<UInt4> val) 5977 { 5978 return val; 5979 } 5980 5981 RValue<UInt4> operator-(RValue<UInt4> val) 5982 { 5983 return RValue<UInt4>(Nucleus::createNeg(val.value)); 5984 } 5985 5986 RValue<UInt4> operator~(RValue<UInt4> val) 5987 { 5988 return RValue<UInt4>(Nucleus::createNot(val.value)); 5989 } 5990 5991 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y) 5992 { 5993 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5994 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5995 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5996 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5997 } 5998 5999 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y) 6000 { 6001 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())); 6002 } 6003 6004 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y) 6005 { 6006 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 6007 // Restore the following line when LLVM is updated to a version where this issue is fixed. 6008 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType())); 6009 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 6010 } 6011 6012 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y) 6013 { 6014 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 6015 } 6016 6017 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y) 6018 { 6019 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 6020 // Restore the following line when LLVM is updated to a version where this issue is fixed. 6021 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType())); 6022 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 6023 } 6024 6025 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y) 6026 { 6027 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())); 6028 } 6029 6030 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y) 6031 { 6032 #if defined(__i386__) || defined(__x86_64__) 6033 if(CPUID::supportsSSE4_1()) 6034 { 6035 return x86::pmaxud(x, y); 6036 } 6037 else 6038 #endif 6039 { 6040 RValue<UInt4> greater = CmpNLE(x, y); 6041 return (x & greater) | (y & ~greater); 6042 } 6043 } 6044 6045 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y) 6046 { 6047 #if defined(__i386__) || defined(__x86_64__) 6048 if(CPUID::supportsSSE4_1()) 6049 { 6050 return x86::pminud(x, y); 6051 } 6052 else 6053 #endif 6054 { 6055 RValue<UInt4> less = CmpLT(x, y); 6056 return (x & less) | (y & ~less); 6057 } 6058 } 6059 6060 Type *UInt4::getType() 6061 { 6062 return T(llvm::VectorType::get(T(UInt::getType()), 4)); 6063 } 6064 6065 Half::Half(RValue<Float> cast) 6066 { 6067 UInt fp32i = As<UInt>(cast); 6068 UInt abs = fp32i & 0x7FFFFFFF; 6069 UShort fp16i((fp32i & 0x80000000) >> 16); // sign 6070 6071 If(abs > 0x47FFEFFF) // Infinity 6072 { 6073 fp16i |= UShort(0x7FFF); 6074 } 6075 Else 6076 { 6077 If(abs < 0x38800000) // Denormal 6078 { 6079 Int mantissa = (abs & 0x007FFFFF) | 0x00800000; 6080 Int e = 113 - (abs >> 23); 6081 abs = IfThenElse(e < 24, mantissa >> e, Int(0)); 6082 fp16i |= UShort((abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13); 6083 } 6084 Else 6085 { 6086 fp16i |= UShort((abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13); 6087 } 6088 } 6089 6090 storeValue(fp16i.loadValue()); 6091 } 6092 6093 Type *Half::getType() 6094 { 6095 return T(llvm::Type::getInt16Ty(*::context)); 6096 } 6097 6098 Float::Float(RValue<Int> cast) 6099 { 6100 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType()); 6101 6102 storeValue(integer); 6103 } 6104 6105 Float::Float(RValue<UInt> cast) 6106 { 6107 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) + 6108 As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u))); 6109 6110 storeValue(result.value); 6111 } 6112 6113 Float::Float(RValue<Half> cast) 6114 { 6115 Int fp16i(As<UShort>(cast)); 6116 6117 Int s = (fp16i >> 15) & 0x00000001; 6118 Int e = (fp16i >> 10) & 0x0000001F; 6119 Int m = fp16i & 0x000003FF; 6120 6121 UInt fp32i(s << 31); 6122 If(e == 0) 6123 { 6124 If(m != 0) 6125 { 6126 While((m & 0x00000400) == 0) 6127 { 6128 m <<= 1; 6129 e -= 1; 6130 } 6131 6132 fp32i |= As<UInt>(((e + (127 - 15) + 1) << 23) | ((m & ~0x00000400) << 13)); 6133 } 6134 } 6135 Else 6136 { 6137 fp32i |= As<UInt>(((e + (127 - 15)) << 23) | (m << 13)); 6138 } 6139 6140 storeValue(As<Float>(fp32i).value); 6141 } 6142 6143 Float::Float(float x) 6144 { 6145 storeValue(Nucleus::createConstantFloat(x)); 6146 } 6147 6148 Float::Float(RValue<Float> rhs) 6149 { 6150 storeValue(rhs.value); 6151 } 6152 6153 Float::Float(const Float &rhs) 6154 { 6155 Value *value = rhs.loadValue(); 6156 storeValue(value); 6157 } 6158 6159 Float::Float(const Reference<Float> &rhs) 6160 { 6161 Value *value = rhs.loadValue(); 6162 storeValue(value); 6163 } 6164 6165 RValue<Float> Float::operator=(RValue<Float> rhs) 6166 { 6167 storeValue(rhs.value); 6168 6169 return rhs; 6170 } 6171 6172 RValue<Float> Float::operator=(const Float &rhs) 6173 { 6174 Value *value = rhs.loadValue(); 6175 storeValue(value); 6176 6177 return RValue<Float>(value); 6178 } 6179 6180 RValue<Float> Float::operator=(const Reference<Float> &rhs) 6181 { 6182 Value *value = rhs.loadValue(); 6183 storeValue(value); 6184 6185 return RValue<Float>(value); 6186 } 6187 6188 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs) 6189 { 6190 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value)); 6191 } 6192 6193 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs) 6194 { 6195 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value)); 6196 } 6197 6198 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs) 6199 { 6200 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value)); 6201 } 6202 6203 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs) 6204 { 6205 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value)); 6206 } 6207 6208 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs) 6209 { 6210 return lhs = lhs + rhs; 6211 } 6212 6213 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs) 6214 { 6215 return lhs = lhs - rhs; 6216 } 6217 6218 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs) 6219 { 6220 return lhs = lhs * rhs; 6221 } 6222 6223 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs) 6224 { 6225 return lhs = lhs / rhs; 6226 } 6227 6228 RValue<Float> operator+(RValue<Float> val) 6229 { 6230 return val; 6231 } 6232 6233 RValue<Float> operator-(RValue<Float> val) 6234 { 6235 return RValue<Float>(Nucleus::createFNeg(val.value)); 6236 } 6237 6238 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs) 6239 { 6240 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value)); 6241 } 6242 6243 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs) 6244 { 6245 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value)); 6246 } 6247 6248 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs) 6249 { 6250 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value)); 6251 } 6252 6253 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs) 6254 { 6255 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value)); 6256 } 6257 6258 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs) 6259 { 6260 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value)); 6261 } 6262 6263 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs) 6264 { 6265 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value)); 6266 } 6267 6268 RValue<Float> Abs(RValue<Float> x) 6269 { 6270 return IfThenElse(x > 0.0f, x, -x); 6271 } 6272 6273 RValue<Float> Max(RValue<Float> x, RValue<Float> y) 6274 { 6275 return IfThenElse(x > y, x, y); 6276 } 6277 6278 RValue<Float> Min(RValue<Float> x, RValue<Float> y) 6279 { 6280 return IfThenElse(x < y, x, y); 6281 } 6282 6283 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2) 6284 { 6285 #if defined(__i386__) || defined(__x86_64__) 6286 if(exactAtPow2) 6287 { 6288 // rcpss uses a piecewise-linear approximation which minimizes the relative error 6289 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6290 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6291 } 6292 return x86::rcpss(x); 6293 #else 6294 return As<Float>(V(lowerRCP(V(x.value)))); 6295 #endif 6296 } 6297 6298 RValue<Float> RcpSqrt_pp(RValue<Float> x) 6299 { 6300 #if defined(__i386__) || defined(__x86_64__) 6301 return x86::rsqrtss(x); 6302 #else 6303 return As<Float>(V(lowerRSQRT(V(x.value)))); 6304 #endif 6305 } 6306 6307 RValue<Float> Sqrt(RValue<Float> x) 6308 { 6309 #if defined(__i386__) || defined(__x86_64__) 6310 return x86::sqrtss(x); 6311 #else 6312 return As<Float>(V(lowerSQRT(V(x.value)))); 6313 #endif 6314 } 6315 6316 RValue<Float> Round(RValue<Float> x) 6317 { 6318 #if defined(__i386__) || defined(__x86_64__) 6319 if(CPUID::supportsSSE4_1()) 6320 { 6321 return x86::roundss(x, 0); 6322 } 6323 else 6324 { 6325 return Float4(Round(Float4(x))).x; 6326 } 6327 #else 6328 return RValue<Float>(V(lowerRound(V(x.value)))); 6329 #endif 6330 } 6331 6332 RValue<Float> Trunc(RValue<Float> x) 6333 { 6334 #if defined(__i386__) || defined(__x86_64__) 6335 if(CPUID::supportsSSE4_1()) 6336 { 6337 return x86::roundss(x, 3); 6338 } 6339 else 6340 { 6341 return Float(Int(x)); // Rounded toward zero 6342 } 6343 #else 6344 return RValue<Float>(V(lowerTrunc(V(x.value)))); 6345 #endif 6346 } 6347 6348 RValue<Float> Frac(RValue<Float> x) 6349 { 6350 #if defined(__i386__) || defined(__x86_64__) 6351 if(CPUID::supportsSSE4_1()) 6352 { 6353 return x - x86::floorss(x); 6354 } 6355 else 6356 { 6357 return Float4(Frac(Float4(x))).x; 6358 } 6359 #else 6360 // x - floor(x) can be 1.0 for very small negative x. 6361 // Clamp against the value just below 1.0. 6362 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF))); 6363 #endif 6364 } 6365 6366 RValue<Float> Floor(RValue<Float> x) 6367 { 6368 #if defined(__i386__) || defined(__x86_64__) 6369 if(CPUID::supportsSSE4_1()) 6370 { 6371 return x86::floorss(x); 6372 } 6373 else 6374 { 6375 return Float4(Floor(Float4(x))).x; 6376 } 6377 #else 6378 return RValue<Float>(V(lowerFloor(V(x.value)))); 6379 #endif 6380 } 6381 6382 RValue<Float> Ceil(RValue<Float> x) 6383 { 6384 #if defined(__i386__) || defined(__x86_64__) 6385 if(CPUID::supportsSSE4_1()) 6386 { 6387 return x86::ceilss(x); 6388 } 6389 else 6390 #endif 6391 { 6392 return Float4(Ceil(Float4(x))).x; 6393 } 6394 } 6395 6396 Type *Float::getType() 6397 { 6398 return T(llvm::Type::getFloatTy(*::context)); 6399 } 6400 6401 Float2::Float2(RValue<Float4> cast) 6402 { 6403 storeValue(Nucleus::createBitCast(cast.value, getType())); 6404 } 6405 6406 Type *Float2::getType() 6407 { 6408 return T(Type_v2f32); 6409 } 6410 6411 Float4::Float4(RValue<Byte4> cast) : XYZW(this) 6412 { 6413 Value *a = Int4(cast).loadValue(); 6414 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 6415 6416 storeValue(xyzw); 6417 } 6418 6419 Float4::Float4(RValue<SByte4> cast) : XYZW(this) 6420 { 6421 Value *a = Int4(cast).loadValue(); 6422 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 6423 6424 storeValue(xyzw); 6425 } 6426 6427 Float4::Float4(RValue<Short4> cast) : XYZW(this) 6428 { 6429 Int4 c(cast); 6430 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6431 } 6432 6433 Float4::Float4(RValue<UShort4> cast) : XYZW(this) 6434 { 6435 Int4 c(cast); 6436 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6437 } 6438 6439 Float4::Float4(RValue<Int4> cast) : XYZW(this) 6440 { 6441 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); 6442 6443 storeValue(xyzw); 6444 } 6445 6446 Float4::Float4(RValue<UInt4> cast) : XYZW(this) 6447 { 6448 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) + 6449 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u))); 6450 6451 storeValue(result.value); 6452 } 6453 6454 Float4::Float4() : XYZW(this) 6455 { 6456 } 6457 6458 Float4::Float4(float xyzw) : XYZW(this) 6459 { 6460 constant(xyzw, xyzw, xyzw, xyzw); 6461 } 6462 6463 Float4::Float4(float x, float yzw) : XYZW(this) 6464 { 6465 constant(x, yzw, yzw, yzw); 6466 } 6467 6468 Float4::Float4(float x, float y, float zw) : XYZW(this) 6469 { 6470 constant(x, y, zw, zw); 6471 } 6472 6473 Float4::Float4(float x, float y, float z, float w) : XYZW(this) 6474 { 6475 constant(x, y, z, w); 6476 } 6477 6478 void Float4::constant(float x, float y, float z, float w) 6479 { 6480 double constantVector[4] = {x, y, z, w}; 6481 storeValue(Nucleus::createConstantVector(constantVector, getType())); 6482 } 6483 6484 Float4::Float4(RValue<Float4> rhs) : XYZW(this) 6485 { 6486 storeValue(rhs.value); 6487 } 6488 6489 Float4::Float4(const Float4 &rhs) : XYZW(this) 6490 { 6491 Value *value = rhs.loadValue(); 6492 storeValue(value); 6493 } 6494 6495 Float4::Float4(const Reference<Float4> &rhs) : XYZW(this) 6496 { 6497 Value *value = rhs.loadValue(); 6498 storeValue(value); 6499 } 6500 6501 Float4::Float4(RValue<Float> rhs) : XYZW(this) 6502 { 6503 Value *vector = loadValue(); 6504 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 6505 6506 int swizzle[4] = {0, 0, 0, 0}; 6507 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 6508 6509 storeValue(replicate); 6510 } 6511 6512 Float4::Float4(const Float &rhs) : XYZW(this) 6513 { 6514 *this = RValue<Float>(rhs.loadValue()); 6515 } 6516 6517 Float4::Float4(const Reference<Float> &rhs) : XYZW(this) 6518 { 6519 *this = RValue<Float>(rhs.loadValue()); 6520 } 6521 6522 RValue<Float4> Float4::operator=(float x) 6523 { 6524 return *this = Float4(x, x, x, x); 6525 } 6526 6527 RValue<Float4> Float4::operator=(RValue<Float4> rhs) 6528 { 6529 storeValue(rhs.value); 6530 6531 return rhs; 6532 } 6533 6534 RValue<Float4> Float4::operator=(const Float4 &rhs) 6535 { 6536 Value *value = rhs.loadValue(); 6537 storeValue(value); 6538 6539 return RValue<Float4>(value); 6540 } 6541 6542 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) 6543 { 6544 Value *value = rhs.loadValue(); 6545 storeValue(value); 6546 6547 return RValue<Float4>(value); 6548 } 6549 6550 RValue<Float4> Float4::operator=(RValue<Float> rhs) 6551 { 6552 return *this = Float4(rhs); 6553 } 6554 6555 RValue<Float4> Float4::operator=(const Float &rhs) 6556 { 6557 return *this = Float4(rhs); 6558 } 6559 6560 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) 6561 { 6562 return *this = Float4(rhs); 6563 } 6564 6565 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs) 6566 { 6567 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value)); 6568 } 6569 6570 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs) 6571 { 6572 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value)); 6573 } 6574 6575 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs) 6576 { 6577 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value)); 6578 } 6579 6580 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs) 6581 { 6582 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value)); 6583 } 6584 6585 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs) 6586 { 6587 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value)); 6588 } 6589 6590 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs) 6591 { 6592 return lhs = lhs + rhs; 6593 } 6594 6595 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs) 6596 { 6597 return lhs = lhs - rhs; 6598 } 6599 6600 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs) 6601 { 6602 return lhs = lhs * rhs; 6603 } 6604 6605 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs) 6606 { 6607 return lhs = lhs / rhs; 6608 } 6609 6610 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs) 6611 { 6612 return lhs = lhs % rhs; 6613 } 6614 6615 RValue<Float4> operator+(RValue<Float4> val) 6616 { 6617 return val; 6618 } 6619 6620 RValue<Float4> operator-(RValue<Float4> val) 6621 { 6622 return RValue<Float4>(Nucleus::createFNeg(val.value)); 6623 } 6624 6625 RValue<Float4> Abs(RValue<Float4> x) 6626 { 6627 Value *vector = Nucleus::createBitCast(x.value, Int4::getType()); 6628 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; 6629 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::getType())); 6630 6631 return As<Float4>(result); 6632 } 6633 6634 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) 6635 { 6636 #if defined(__i386__) || defined(__x86_64__) 6637 return x86::maxps(x, y); 6638 #else 6639 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT))); 6640 #endif 6641 } 6642 6643 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y) 6644 { 6645 #if defined(__i386__) || defined(__x86_64__) 6646 return x86::minps(x, y); 6647 #else 6648 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT))); 6649 #endif 6650 } 6651 6652 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2) 6653 { 6654 #if defined(__i386__) || defined(__x86_64__) 6655 if(exactAtPow2) 6656 { 6657 // rcpps uses a piecewise-linear approximation which minimizes the relative error 6658 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6659 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6660 } 6661 return x86::rcpps(x); 6662 #else 6663 return As<Float4>(V(lowerRCP(V(x.value)))); 6664 #endif 6665 } 6666 6667 RValue<Float4> RcpSqrt_pp(RValue<Float4> x) 6668 { 6669 #if defined(__i386__) || defined(__x86_64__) 6670 return x86::rsqrtps(x); 6671 #else 6672 return As<Float4>(V(lowerRSQRT(V(x.value)))); 6673 #endif 6674 } 6675 6676 RValue<Float4> Sqrt(RValue<Float4> x) 6677 { 6678 #if defined(__i386__) || defined(__x86_64__) 6679 return x86::sqrtps(x); 6680 #else 6681 return As<Float4>(V(lowerSQRT(V(x.value)))); 6682 #endif 6683 } 6684 6685 RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i) 6686 { 6687 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i)); 6688 } 6689 6690 RValue<Float> Extract(RValue<Float4> x, int i) 6691 { 6692 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i)); 6693 } 6694 6695 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select) 6696 { 6697 return RValue<Float4>(createSwizzle4(x.value, select)); 6698 } 6699 6700 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 6701 { 6702 int shuffle[4] = 6703 { 6704 ((imm >> 0) & 0x03) + 0, 6705 ((imm >> 2) & 0x03) + 0, 6706 ((imm >> 4) & 0x03) + 4, 6707 ((imm >> 6) & 0x03) + 4, 6708 }; 6709 6710 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6711 } 6712 6713 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y) 6714 { 6715 int shuffle[4] = {0, 4, 1, 5}; 6716 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6717 } 6718 6719 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y) 6720 { 6721 int shuffle[4] = {2, 6, 3, 7}; 6722 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6723 } 6724 6725 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) 6726 { 6727 Value *vector = lhs.loadValue(); 6728 Value *result = createMask4(vector, rhs.value, select); 6729 lhs.storeValue(result); 6730 6731 return RValue<Float4>(result); 6732 } 6733 6734 RValue<Int> SignMask(RValue<Float4> x) 6735 { 6736 #if defined(__i386__) || defined(__x86_64__) 6737 return x86::movmskps(x); 6738 #else 6739 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType())))); 6740 #endif 6741 } 6742 6743 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y) 6744 { 6745 // return As<Int4>(x86::cmpeqps(x, y)); 6746 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType())); 6747 } 6748 6749 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y) 6750 { 6751 // return As<Int4>(x86::cmpltps(x, y)); 6752 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType())); 6753 } 6754 6755 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y) 6756 { 6757 // return As<Int4>(x86::cmpleps(x, y)); 6758 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType())); 6759 } 6760 6761 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y) 6762 { 6763 // return As<Int4>(x86::cmpneqps(x, y)); 6764 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType())); 6765 } 6766 6767 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y) 6768 { 6769 // return As<Int4>(x86::cmpnltps(x, y)); 6770 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType())); 6771 } 6772 6773 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y) 6774 { 6775 // return As<Int4>(x86::cmpnleps(x, y)); 6776 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType())); 6777 } 6778 6779 RValue<Int4> IsInf(RValue<Float4> x) 6780 { 6781 return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000)); 6782 } 6783 6784 RValue<Int4> IsNan(RValue<Float4> x) 6785 { 6786 return ~CmpEQ(x, x); 6787 } 6788 6789 RValue<Float4> Round(RValue<Float4> x) 6790 { 6791 #if defined(__i386__) || defined(__x86_64__) 6792 if(CPUID::supportsSSE4_1()) 6793 { 6794 return x86::roundps(x, 0); 6795 } 6796 else 6797 { 6798 return Float4(RoundInt(x)); 6799 } 6800 #else 6801 return RValue<Float4>(V(lowerRound(V(x.value)))); 6802 #endif 6803 } 6804 6805 RValue<Float4> Trunc(RValue<Float4> x) 6806 { 6807 #if defined(__i386__) || defined(__x86_64__) 6808 if(CPUID::supportsSSE4_1()) 6809 { 6810 return x86::roundps(x, 3); 6811 } 6812 else 6813 { 6814 return Float4(Int4(x)); 6815 } 6816 #else 6817 return RValue<Float4>(V(lowerTrunc(V(x.value)))); 6818 #endif 6819 } 6820 6821 RValue<Float4> Frac(RValue<Float4> x) 6822 { 6823 Float4 frc; 6824 6825 #if defined(__i386__) || defined(__x86_64__) 6826 if(CPUID::supportsSSE4_1()) 6827 { 6828 frc = x - Floor(x); 6829 } 6830 else 6831 { 6832 frc = x - Float4(Int4(x)); // Signed fractional part. 6833 6834 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative. 6835 } 6836 #else 6837 frc = x - Floor(x); 6838 #endif 6839 6840 // x - floor(x) can be 1.0 for very small negative x. 6841 // Clamp against the value just below 1.0. 6842 return Min(frc, As<Float4>(Int4(0x3F7FFFFF))); 6843 } 6844 6845 RValue<Float4> Floor(RValue<Float4> x) 6846 { 6847 #if defined(__i386__) || defined(__x86_64__) 6848 if(CPUID::supportsSSE4_1()) 6849 { 6850 return x86::floorps(x); 6851 } 6852 else 6853 { 6854 return x - Frac(x); 6855 } 6856 #else 6857 return RValue<Float4>(V(lowerFloor(V(x.value)))); 6858 #endif 6859 } 6860 6861 RValue<Float4> Ceil(RValue<Float4> x) 6862 { 6863 #if defined(__i386__) || defined(__x86_64__) 6864 if(CPUID::supportsSSE4_1()) 6865 { 6866 return x86::ceilps(x); 6867 } 6868 else 6869 #endif 6870 { 6871 return -Floor(-x); 6872 } 6873 } 6874 6875 Type *Float4::getType() 6876 { 6877 return T(llvm::VectorType::get(T(Float::getType()), 4)); 6878 } 6879 6880 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) 6881 { 6882 return lhs + RValue<Int>(Nucleus::createConstantInt(offset)); 6883 } 6884 6885 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6886 { 6887 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false)); 6888 } 6889 6890 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6891 { 6892 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true)); 6893 } 6894 6895 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset) 6896 { 6897 return lhs = lhs + offset; 6898 } 6899 6900 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset) 6901 { 6902 return lhs = lhs + offset; 6903 } 6904 6905 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset) 6906 { 6907 return lhs = lhs + offset; 6908 } 6909 6910 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset) 6911 { 6912 return lhs + -offset; 6913 } 6914 6915 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6916 { 6917 return lhs + -offset; 6918 } 6919 6920 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6921 { 6922 return lhs + -offset; 6923 } 6924 6925 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset) 6926 { 6927 return lhs = lhs - offset; 6928 } 6929 6930 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset) 6931 { 6932 return lhs = lhs - offset; 6933 } 6934 6935 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset) 6936 { 6937 return lhs = lhs - offset; 6938 } 6939 6940 void Return() 6941 { 6942 Nucleus::createRetVoid(); 6943 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6944 Nucleus::createUnreachable(); 6945 } 6946 6947 void Return(RValue<Int> ret) 6948 { 6949 Nucleus::createRet(ret.value); 6950 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6951 Nucleus::createUnreachable(); 6952 } 6953 6954 void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB) 6955 { 6956 Nucleus::createCondBr(cmp.value, bodyBB, endBB); 6957 Nucleus::setInsertBlock(bodyBB); 6958 } 6959 6960 RValue<Long> Ticks() 6961 { 6962 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter); 6963 6964 return RValue<Long>(V(::builder->CreateCall(rdtsc))); 6965 } 6966 } 6967 6968 namespace rr 6969 { 6970 #if defined(__i386__) || defined(__x86_64__) 6971 namespace x86 6972 { 6973 RValue<Int> cvtss2si(RValue<Float> val) 6974 { 6975 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si); 6976 6977 Float4 vector; 6978 vector.x = val; 6979 6980 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value))))); 6981 } 6982 6983 RValue<Int4> cvtps2dq(RValue<Float4> val) 6984 { 6985 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq); 6986 6987 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value))))); 6988 } 6989 6990 RValue<Float> rcpss(RValue<Float> val) 6991 { 6992 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss); 6993 6994 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 6995 6996 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0)); 6997 } 6998 6999 RValue<Float> sqrtss(RValue<Float> val) 7000 { 7001 #if REACTOR_LLVM_VERSION < 7 7002 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss); 7003 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 7004 7005 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0)); 7006 #else 7007 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()}); 7008 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value))))); 7009 #endif 7010 } 7011 7012 RValue<Float> rsqrtss(RValue<Float> val) 7013 { 7014 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss); 7015 7016 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0); 7017 7018 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0)); 7019 } 7020 7021 RValue<Float4> rcpps(RValue<Float4> val) 7022 { 7023 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps); 7024 7025 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value))))); 7026 } 7027 7028 RValue<Float4> sqrtps(RValue<Float4> val) 7029 { 7030 #if REACTOR_LLVM_VERSION < 7 7031 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps); 7032 #else 7033 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()}); 7034 #endif 7035 7036 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value))))); 7037 } 7038 7039 RValue<Float4> rsqrtps(RValue<Float4> val) 7040 { 7041 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps); 7042 7043 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value))))); 7044 } 7045 7046 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y) 7047 { 7048 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps); 7049 7050 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value))))); 7051 } 7052 7053 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y) 7054 { 7055 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps); 7056 7057 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value))))); 7058 } 7059 7060 RValue<Float> roundss(RValue<Float> val, unsigned char imm) 7061 { 7062 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss); 7063 7064 Value *undef = V(llvm::UndefValue::get(T(Float4::getType()))); 7065 Value *vector = Nucleus::createInsertElement(undef, val.value, 0); 7066 7067 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0)); 7068 } 7069 7070 RValue<Float> floorss(RValue<Float> val) 7071 { 7072 return roundss(val, 1); 7073 } 7074 7075 RValue<Float> ceilss(RValue<Float> val) 7076 { 7077 return roundss(val, 2); 7078 } 7079 7080 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm) 7081 { 7082 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps); 7083 7084 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm)))))); 7085 } 7086 7087 RValue<Float4> floorps(RValue<Float4> val) 7088 { 7089 return roundps(val, 1); 7090 } 7091 7092 RValue<Float4> ceilps(RValue<Float4> val) 7093 { 7094 return roundps(val, 2); 7095 } 7096 7097 RValue<Int4> pabsd(RValue<Int4> x) 7098 { 7099 #if REACTOR_LLVM_VERSION < 7 7100 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128); 7101 7102 return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value))))); 7103 #else 7104 return RValue<Int4>(V(lowerPABS(V(x.value)))); 7105 #endif 7106 } 7107 7108 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) 7109 { 7110 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w); 7111 7112 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value))))); 7113 } 7114 7115 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) 7116 { 7117 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w); 7118 7119 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value))))); 7120 } 7121 7122 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) 7123 { 7124 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w); 7125 7126 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value))))); 7127 } 7128 7129 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) 7130 { 7131 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w); 7132 7133 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value))))); 7134 } 7135 7136 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) 7137 { 7138 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b); 7139 7140 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value))))); 7141 } 7142 7143 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) 7144 { 7145 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b); 7146 7147 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value))))); 7148 } 7149 7150 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) 7151 { 7152 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b); 7153 7154 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value))))); 7155 } 7156 7157 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) 7158 { 7159 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b); 7160 7161 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value))))); 7162 } 7163 7164 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) 7165 { 7166 #if REACTOR_LLVM_VERSION < 7 7167 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w); 7168 7169 return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value))))); 7170 #else 7171 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value)))); 7172 #endif 7173 } 7174 7175 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) 7176 { 7177 #if REACTOR_LLVM_VERSION < 7 7178 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w); 7179 7180 return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value))))); 7181 #else 7182 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT))); 7183 #endif 7184 } 7185 7186 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) 7187 { 7188 #if REACTOR_LLVM_VERSION < 7 7189 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w); 7190 7191 return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value))))); 7192 #else 7193 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT))); 7194 #endif 7195 } 7196 7197 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) 7198 { 7199 #if REACTOR_LLVM_VERSION < 7 7200 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w); 7201 7202 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value))))); 7203 #else 7204 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType())))); 7205 #endif 7206 } 7207 7208 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) 7209 { 7210 #if REACTOR_LLVM_VERSION < 7 7211 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w); 7212 7213 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value))))); 7214 #else 7215 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType())))); 7216 #endif 7217 } 7218 7219 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) 7220 { 7221 #if REACTOR_LLVM_VERSION < 7 7222 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b); 7223 7224 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value))))); 7225 #else 7226 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType())))); 7227 #endif 7228 } 7229 7230 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) 7231 { 7232 #if REACTOR_LLVM_VERSION < 7 7233 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b); 7234 7235 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value))))); 7236 #else 7237 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType())))); 7238 #endif 7239 } 7240 7241 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) 7242 { 7243 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 7244 7245 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value))))); 7246 } 7247 7248 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) 7249 { 7250 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128); 7251 7252 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value))))); 7253 } 7254 7255 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) 7256 { 7257 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128); 7258 7259 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value))))); 7260 } 7261 7262 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y) 7263 { 7264 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128); 7265 7266 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value))))); 7267 } 7268 7269 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y) 7270 { 7271 if(CPUID::supportsSSE4_1()) 7272 { 7273 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw); 7274 7275 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value))))); 7276 } 7277 else 7278 { 7279 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000); 7280 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000); 7281 7282 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u)); 7283 } 7284 } 7285 7286 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) 7287 { 7288 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 7289 7290 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7291 } 7292 7293 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) 7294 { 7295 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w); 7296 7297 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7298 } 7299 7300 RValue<Short4> psraw(RValue<Short4> x, unsigned char y) 7301 { 7302 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 7303 7304 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7305 } 7306 7307 RValue<Short8> psraw(RValue<Short8> x, unsigned char y) 7308 { 7309 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w); 7310 7311 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7312 } 7313 7314 RValue<Short4> psllw(RValue<Short4> x, unsigned char y) 7315 { 7316 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 7317 7318 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7319 } 7320 7321 RValue<Short8> psllw(RValue<Short8> x, unsigned char y) 7322 { 7323 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w); 7324 7325 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7326 } 7327 7328 RValue<Int2> pslld(RValue<Int2> x, unsigned char y) 7329 { 7330 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 7331 7332 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7333 } 7334 7335 RValue<Int4> pslld(RValue<Int4> x, unsigned char y) 7336 { 7337 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d); 7338 7339 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7340 } 7341 7342 RValue<Int2> psrad(RValue<Int2> x, unsigned char y) 7343 { 7344 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 7345 7346 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7347 } 7348 7349 RValue<Int4> psrad(RValue<Int4> x, unsigned char y) 7350 { 7351 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d); 7352 7353 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7354 } 7355 7356 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) 7357 { 7358 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 7359 7360 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7361 } 7362 7363 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) 7364 { 7365 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d); 7366 7367 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y)))))); 7368 } 7369 7370 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) 7371 { 7372 #if REACTOR_LLVM_VERSION < 7 7373 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd); 7374 7375 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value))))); 7376 #else 7377 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT))); 7378 #endif 7379 } 7380 7381 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y) 7382 { 7383 #if REACTOR_LLVM_VERSION < 7 7384 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd); 7385 7386 return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value))))); 7387 #else 7388 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT))); 7389 #endif 7390 } 7391 7392 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y) 7393 { 7394 #if REACTOR_LLVM_VERSION < 7 7395 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud); 7396 7397 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value))))); 7398 #else 7399 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT))); 7400 #endif 7401 } 7402 7403 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y) 7404 { 7405 #if REACTOR_LLVM_VERSION < 7 7406 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud); 7407 7408 return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value))))); 7409 #else 7410 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT))); 7411 #endif 7412 } 7413 7414 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) 7415 { 7416 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 7417 7418 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value))))); 7419 } 7420 7421 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) 7422 { 7423 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 7424 7425 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value))))); 7426 } 7427 7428 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) 7429 { 7430 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 7431 7432 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value))))); 7433 } 7434 7435 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) 7436 { 7437 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w); 7438 7439 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value))))); 7440 } 7441 7442 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y) 7443 { 7444 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w); 7445 7446 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value))))); 7447 } 7448 7449 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y) 7450 { 7451 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd); 7452 7453 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value))))); 7454 } 7455 7456 RValue<Int> movmskps(RValue<Float4> x) 7457 { 7458 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps); 7459 7460 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value))))); 7461 } 7462 7463 RValue<Int> pmovmskb(RValue<Byte8> x) 7464 { 7465 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128); 7466 7467 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF; 7468 } 7469 7470 RValue<Int4> pmovzxbd(RValue<Byte16> x) 7471 { 7472 #if REACTOR_LLVM_VERSION < 7 7473 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd); 7474 7475 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value))))); 7476 #else 7477 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false))); 7478 #endif 7479 } 7480 7481 RValue<Int4> pmovsxbd(RValue<SByte16> x) 7482 { 7483 #if REACTOR_LLVM_VERSION < 7 7484 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd); 7485 7486 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value))))); 7487 #else 7488 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true))); 7489 #endif 7490 } 7491 7492 RValue<Int4> pmovzxwd(RValue<UShort8> x) 7493 { 7494 #if REACTOR_LLVM_VERSION < 7 7495 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd); 7496 7497 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value))))); 7498 #else 7499 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false))); 7500 #endif 7501 } 7502 7503 RValue<Int4> pmovsxwd(RValue<Short8> x) 7504 { 7505 #if REACTOR_LLVM_VERSION < 7 7506 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd); 7507 7508 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value))))); 7509 #else 7510 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true))); 7511 #endif 7512 } 7513 } 7514 #endif // defined(__i386__) || defined(__x86_64__) 7515 } 7516