Home | History | Annotate | Download | only in Reactor
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Reactor.hpp"
     16 
     17 #include "x86.hpp"
     18 #include "CPUID.hpp"
     19 #include "Thread.hpp"
     20 #include "ExecutableMemory.hpp"
     21 #include "MutexLock.hpp"
     22 
     23 #undef min
     24 #undef max
     25 
     26 #if REACTOR_LLVM_VERSION < 7
     27 	#include "llvm/Analysis/LoopPass.h"
     28 	#include "llvm/Constants.h"
     29 	#include "llvm/Function.h"
     30 	#include "llvm/GlobalVariable.h"
     31 	#include "llvm/Intrinsics.h"
     32 	#include "llvm/LLVMContext.h"
     33 	#include "llvm/Module.h"
     34 	#include "llvm/PassManager.h"
     35 	#include "llvm/Support/IRBuilder.h"
     36 	#include "llvm/Support/TargetSelect.h"
     37 	#include "llvm/Target/TargetData.h"
     38 	#include "llvm/Target/TargetOptions.h"
     39 	#include "llvm/Transforms/Scalar.h"
     40 	#include "../lib/ExecutionEngine/JIT/JIT.h"
     41 
     42 	#include "LLVMRoutine.hpp"
     43 	#include "LLVMRoutineManager.hpp"
     44 
     45 	#define ARGS(...) __VA_ARGS__
     46 #else
     47 	#include "llvm/Analysis/LoopPass.h"
     48 	#include "llvm/ExecutionEngine/ExecutionEngine.h"
     49 	#include "llvm/ExecutionEngine/JITSymbol.h"
     50 	#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
     51 	#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
     52 	#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
     53 	#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
     54 	#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
     55 	#include "llvm/ExecutionEngine/SectionMemoryManager.h"
     56 	#include "llvm/IR/Constants.h"
     57 	#include "llvm/IR/DataLayout.h"
     58 	#include "llvm/IR/Function.h"
     59 	#include "llvm/IR/GlobalVariable.h"
     60 	#include "llvm/IR/IRBuilder.h"
     61 	#include "llvm/IR/Intrinsics.h"
     62 	#include "llvm/IR/LLVMContext.h"
     63 	#include "llvm/IR/LegacyPassManager.h"
     64 	#include "llvm/IR/Mangler.h"
     65 	#include "llvm/IR/Module.h"
     66 	#include "llvm/Support/Error.h"
     67 	#include "llvm/Support/TargetSelect.h"
     68 	#include "llvm/Target/TargetOptions.h"
     69 	#include "llvm/Transforms/InstCombine/InstCombine.h"
     70 	#include "llvm/Transforms/Scalar.h"
     71 	#include "llvm/Transforms/Scalar/GVN.h"
     72 
     73 	#include "LLVMRoutine.hpp"
     74 
     75 	#define ARGS(...) {__VA_ARGS__}
     76 	#define CreateCall2 CreateCall
     77 	#define CreateCall3 CreateCall
     78 
     79 	#include <unordered_map>
     80 #endif
     81 
     82 #include <numeric>
     83 #include <fstream>
     84 
     85 #if defined(__i386__) || defined(__x86_64__)
     86 #include <xmmintrin.h>
     87 #endif
     88 
     89 #include <math.h>
     90 
     91 #if defined(__x86_64__) && defined(_WIN32)
     92 extern "C" void X86CompilationCallback()
     93 {
     94 	assert(false);   // UNIMPLEMENTED
     95 }
     96 #endif
     97 
     98 #if REACTOR_LLVM_VERSION < 7
     99 namespace llvm
    100 {
    101 	extern bool JITEmitDebugInfo;
    102 }
    103 #endif
    104 
    105 namespace rr
    106 {
    107 	class LLVMReactorJIT;
    108 }
    109 
    110 namespace
    111 {
    112 	rr::LLVMReactorJIT *reactorJIT = nullptr;
    113 	llvm::IRBuilder<> *builder = nullptr;
    114 	llvm::LLVMContext *context = nullptr;
    115 	llvm::Module *module = nullptr;
    116 	llvm::Function *function = nullptr;
    117 
    118 	rr::MutexLock codegenMutex;
    119 
    120 #if REACTOR_LLVM_VERSION >= 7
    121 	llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
    122 	{
    123 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    124 
    125 		llvm::VectorType *extTy =
    126 			llvm::VectorType::getExtendedElementVectorType(ty);
    127 		x = ::builder->CreateZExt(x, extTy);
    128 		y = ::builder->CreateZExt(y, extTy);
    129 
    130 		// (x + y + 1) >> 1
    131 		llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
    132 		llvm::Value *res = ::builder->CreateAdd(x, y);
    133 		res = ::builder->CreateAdd(res, one);
    134 		res = ::builder->CreateLShr(res, one);
    135 		return ::builder->CreateTrunc(res, ty);
    136 	}
    137 
    138 	llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
    139 	                          llvm::ICmpInst::Predicate pred)
    140 	{
    141 		return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
    142 	}
    143 
    144 	llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
    145 	                       llvm::Value *y, llvm::Type *dstTy)
    146 	{
    147 		return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
    148 	}
    149 
    150 #if defined(__i386__) || defined(__x86_64__)
    151 	llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
    152 	{
    153 		llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
    154 		llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
    155 
    156 		llvm::Value *undef = llvm::UndefValue::get(srcTy);
    157 		llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
    158 		std::iota(mask.begin(), mask.end(), 0);
    159 		llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
    160 
    161 		return sext ? ::builder->CreateSExt(v, dstTy)
    162 		            : ::builder->CreateZExt(v, dstTy);
    163 	}
    164 
    165 	llvm::Value *lowerPABS(llvm::Value *v)
    166 	{
    167 		llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
    168 		llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
    169 		llvm::Value *neg = ::builder->CreateNeg(v);
    170 		return ::builder->CreateSelect(cmp, v, neg);
    171 	}
    172 #endif  // defined(__i386__) || defined(__x86_64__)
    173 
    174 #if !defined(__i386__) && !defined(__x86_64__)
    175 	llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
    176 	                           llvm::FCmpInst::Predicate pred)
    177 	{
    178 		return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
    179 	}
    180 
    181 	llvm::Value *lowerRound(llvm::Value *x)
    182 	{
    183 		llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
    184 			::module, llvm::Intrinsic::nearbyint, {x->getType()});
    185 		return ::builder->CreateCall(nearbyint, ARGS(x));
    186 	}
    187 
    188 	llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
    189 	{
    190 		return ::builder->CreateFPToSI(lowerRound(x), ty);
    191 	}
    192 
    193 	llvm::Value *lowerFloor(llvm::Value *x)
    194 	{
    195 		llvm::Function *floor = llvm::Intrinsic::getDeclaration(
    196 			::module, llvm::Intrinsic::floor, {x->getType()});
    197 		return ::builder->CreateCall(floor, ARGS(x));
    198 	}
    199 
    200 	llvm::Value *lowerTrunc(llvm::Value *x)
    201 	{
    202 		llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
    203 			::module, llvm::Intrinsic::trunc, {x->getType()});
    204 		return ::builder->CreateCall(trunc, ARGS(x));
    205 	}
    206 
    207 	// Packed add/sub saturatation
    208 	llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
    209 	{
    210 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    211 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
    212 
    213 		unsigned numBits = ty->getScalarSizeInBits();
    214 
    215 		llvm::Value *max, *min, *extX, *extY;
    216 		if (isSigned)
    217 		{
    218 			max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
    219 			min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
    220 			extX = ::builder->CreateSExt(x, extTy);
    221 			extY = ::builder->CreateSExt(y, extTy);
    222 		}
    223 		else
    224 		{
    225 			assert(numBits <= 64);
    226 			uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
    227 			max = llvm::ConstantInt::get(extTy, maxVal, false);
    228 			min = llvm::ConstantInt::get(extTy, 0, false);
    229 			extX = ::builder->CreateZExt(x, extTy);
    230 			extY = ::builder->CreateZExt(y, extTy);
    231 		}
    232 
    233 		llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
    234 		                         : ::builder->CreateSub(extX, extY);
    235 
    236 		res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
    237 		res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
    238 
    239 		return ::builder->CreateTrunc(res, ty);
    240 	}
    241 
    242 	llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
    243 	{
    244 		return lowerPSAT(x, y, true, false);
    245 	}
    246 
    247 	llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
    248 	{
    249 		return lowerPSAT(x, y, true, true);
    250 	}
    251 
    252 	llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
    253 	{
    254 		return lowerPSAT(x, y, false, false);
    255 	}
    256 
    257 	llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
    258 	{
    259 		return lowerPSAT(x, y, false, true);
    260 	}
    261 
    262 	llvm::Value *lowerSQRT(llvm::Value *x)
    263 	{
    264 		llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
    265 			::module, llvm::Intrinsic::sqrt, {x->getType()});
    266 		return ::builder->CreateCall(sqrt, ARGS(x));
    267 	}
    268 
    269 	llvm::Value *lowerRCP(llvm::Value *x)
    270 	{
    271 		llvm::Type *ty = x->getType();
    272 		llvm::Constant *one;
    273 		if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
    274 		{
    275 			one = llvm::ConstantVector::getSplat(
    276 				vectorTy->getNumElements(),
    277 				llvm::ConstantFP::get(vectorTy->getElementType(), 1));
    278 		}
    279 		else
    280 		{
    281 			one = llvm::ConstantFP::get(ty, 1);
    282 		}
    283 		return ::builder->CreateFDiv(one, x);
    284 	}
    285 
    286 	llvm::Value *lowerRSQRT(llvm::Value *x)
    287 	{
    288 		return lowerRCP(lowerSQRT(x));
    289 	}
    290 
    291 	llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
    292 	{
    293 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    294 		llvm::Value *y = llvm::ConstantVector::getSplat(
    295 			ty->getNumElements(),
    296 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
    297 		return ::builder->CreateShl(x, y);
    298 	}
    299 
    300 	llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
    301 	{
    302 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    303 		llvm::Value *y = llvm::ConstantVector::getSplat(
    304 			ty->getNumElements(),
    305 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
    306 		return ::builder->CreateAShr(x, y);
    307 	}
    308 
    309 	llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
    310 	{
    311 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    312 		llvm::Value *y = llvm::ConstantVector::getSplat(
    313 			ty->getNumElements(),
    314 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
    315 		return ::builder->CreateLShr(x, y);
    316 	}
    317 
    318 	llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
    319 	{
    320 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    321 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
    322 
    323 		llvm::Value *extX = ::builder->CreateSExt(x, extTy);
    324 		llvm::Value *extY = ::builder->CreateSExt(y, extTy);
    325 		llvm::Value *mult = ::builder->CreateMul(extX, extY);
    326 
    327 		llvm::Value *undef = llvm::UndefValue::get(extTy);
    328 
    329 		llvm::SmallVector<uint32_t, 16> evenIdx;
    330 		llvm::SmallVector<uint32_t, 16> oddIdx;
    331 		for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
    332 		{
    333 			evenIdx.push_back(i);
    334 			oddIdx.push_back(i + 1);
    335 		}
    336 
    337 		llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
    338 		llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
    339 		return ::builder->CreateAdd(lhs, rhs);
    340 	}
    341 
    342 	llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
    343 	{
    344 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    345 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
    346 
    347 		llvm::Value *extX, *extY;
    348 		if (sext)
    349 		{
    350 			extX = ::builder->CreateSExt(x, extTy);
    351 			extY = ::builder->CreateSExt(y, extTy);
    352 		}
    353 		else
    354 		{
    355 			extX = ::builder->CreateZExt(x, extTy);
    356 			extY = ::builder->CreateZExt(y, extTy);
    357 		}
    358 
    359 		llvm::Value *mult = ::builder->CreateMul(extX, extY);
    360 
    361 		llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
    362 		llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getIntegerBitWidth());
    363 		return ::builder->CreateTrunc(mulh, ty);
    364 	}
    365 
    366 	llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
    367 	{
    368 		llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
    369 		llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
    370 
    371 		llvm::IntegerType *dstElemTy =
    372 			llvm::cast<llvm::IntegerType>(dstTy->getElementType());
    373 
    374 		uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
    375 		assert(truncNumBits < 64 && "shift 64 must be handled separately");
    376 		llvm::Constant *max, *min;
    377 		if (isSigned)
    378 		{
    379 			max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
    380 			min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
    381 		}
    382 		else
    383 		{
    384 			max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
    385 			min = llvm::ConstantInt::get(srcTy, 0, false);
    386 		}
    387 
    388 		x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
    389 		x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
    390 		y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
    391 		y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
    392 
    393 		x = ::builder->CreateTrunc(x, dstTy);
    394 		y = ::builder->CreateTrunc(y, dstTy);
    395 
    396 		llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
    397 		std::iota(index.begin(), index.end(), 0);
    398 
    399 		return ::builder->CreateShuffleVector(x, y, index);
    400 	}
    401 
    402 	llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
    403 	{
    404 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    405 		llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
    406 		llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
    407 
    408 		llvm::Value *ret = ::builder->CreateZExt(
    409 			::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
    410 		for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
    411 		{
    412 			llvm::Value *elem = ::builder->CreateZExt(
    413 				::builder->CreateExtractElement(cmp, i), retTy);
    414 			ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
    415 		}
    416 		return ret;
    417 	}
    418 
    419 	llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
    420 	{
    421 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
    422 		llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
    423 		llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
    424 
    425 		llvm::Value *ret = ::builder->CreateZExt(
    426 			::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
    427 		for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
    428 		{
    429 			llvm::Value *elem = ::builder->CreateZExt(
    430 				::builder->CreateExtractElement(cmp, i), retTy);
    431 			ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
    432 		}
    433 		return ret;
    434 	}
    435 #endif  // !defined(__i386__) && !defined(__x86_64__)
    436 #endif  // REACTOR_LLVM_VERSION >= 7
    437 }
    438 
    439 namespace rr
    440 {
    441 #if REACTOR_LLVM_VERSION < 7
    442 	class LLVMReactorJIT
    443 	{
    444 	private:
    445 		std::string arch;
    446 		llvm::SmallVector<std::string, 16> mattrs;
    447 		llvm::ExecutionEngine *executionEngine;
    448 		LLVMRoutineManager *routineManager;
    449 
    450 	public:
    451 		LLVMReactorJIT(const std::string &arch_,
    452 		               const llvm::SmallVectorImpl<std::string> &mattrs_) :
    453 			arch(arch_),
    454 			mattrs(mattrs_.begin(), mattrs_.end()),
    455 			executionEngine(nullptr),
    456 			routineManager(nullptr)
    457 		{
    458 		}
    459 
    460 		void startSession()
    461 		{
    462 			std::string error;
    463 
    464 			::module = new llvm::Module("", *::context);
    465 
    466 			routineManager = new LLVMRoutineManager();
    467 
    468 			llvm::TargetMachine *targetMachine =
    469 				llvm::EngineBuilder::selectTarget(
    470 					::module, arch, "", mattrs, llvm::Reloc::Default,
    471 					llvm::CodeModel::JITDefault, &error);
    472 
    473 			executionEngine = llvm::JIT::createJIT(
    474 				::module, &error, routineManager, llvm::CodeGenOpt::Aggressive,
    475 				true, targetMachine);
    476 		}
    477 
    478 		void endSession()
    479 		{
    480 			delete executionEngine;
    481 			executionEngine = nullptr;
    482 			routineManager = nullptr;
    483 
    484 			::function = nullptr;
    485 			::module = nullptr;
    486 		}
    487 
    488 		LLVMRoutine *acquireRoutine(llvm::Function *func)
    489 		{
    490 			void *entry = executionEngine->getPointerToFunction(::function);
    491 			return routineManager->acquireRoutine(entry);
    492 		}
    493 
    494 		void optimize(llvm::Module *module)
    495 		{
    496 			static llvm::PassManager *passManager = nullptr;
    497 
    498 			if(!passManager)
    499 			{
    500 				passManager = new llvm::PassManager();
    501 
    502 				passManager->add(new llvm::TargetData(*executionEngine->getTargetData()));
    503 				passManager->add(llvm::createScalarReplAggregatesPass());
    504 
    505 				for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
    506 				{
    507 					switch(optimization[pass])
    508 					{
    509 					case Disabled:                                                                       break;
    510 					case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
    511 					case LICM:                 passManager->add(llvm::createLICMPass());                 break;
    512 					case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
    513 					case GVN:                  passManager->add(llvm::createGVNPass());                  break;
    514 					case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
    515 					case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
    516 					case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
    517 					case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
    518 					case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
    519 					default:
    520 						assert(false);
    521 					}
    522 				}
    523 			}
    524 
    525 			passManager->run(*::module);
    526 		}
    527 	};
    528 #else
    529 	class ExternalFunctionSymbolResolver
    530 	{
    531 	private:
    532 		using FunctionMap = std::unordered_map<std::string, void *>;
    533 		FunctionMap func_;
    534 
    535 	public:
    536 		ExternalFunctionSymbolResolver()
    537 		{
    538 			func_.emplace("floorf", reinterpret_cast<void*>(floorf));
    539 			func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
    540 			func_.emplace("truncf", reinterpret_cast<void*>(truncf));
    541 		}
    542 
    543 		void *findSymbol(const std::string &name) const
    544 		{
    545 			FunctionMap::const_iterator it = func_.find(name);
    546 			return (it != func_.end()) ? it->second : nullptr;
    547 		}
    548 	};
    549 
    550 	class LLVMReactorJIT
    551 	{
    552 	private:
    553 		using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
    554 		using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
    555 
    556 		llvm::orc::ExecutionSession session;
    557 		ExternalFunctionSymbolResolver externalSymbolResolver;
    558 		std::shared_ptr<llvm::orc::SymbolResolver> resolver;
    559 		std::unique_ptr<llvm::TargetMachine> targetMachine;
    560 		const llvm::DataLayout dataLayout;
    561 		ObjLayer objLayer;
    562 		CompileLayer compileLayer;
    563 		size_t emittedFunctionsNum;
    564 
    565 	public:
    566 		LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
    567 					   const llvm::TargetOptions &targetOpts):
    568 			resolver(createLegacyLookupResolver(
    569 				session,
    570 				[this](const std::string &name) {
    571 					void *func = externalSymbolResolver.findSymbol(name);
    572 					if (func != nullptr)
    573 					{
    574 						return llvm::JITSymbol(
    575 							reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
    576 					}
    577 
    578 					return objLayer.findSymbol(name, true);
    579 				},
    580 				[](llvm::Error err) {
    581 					if (err)
    582 					{
    583 						// TODO: Log the symbol resolution errors.
    584 						return;
    585 					}
    586 				})),
    587 			targetMachine(llvm::EngineBuilder()
    588 				.setMArch(arch)
    589 				.setMAttrs(mattrs)
    590 				.setTargetOptions(targetOpts)
    591 				.selectTarget()),
    592 			dataLayout(targetMachine->createDataLayout()),
    593 			objLayer(
    594 				session,
    595 				[this](llvm::orc::VModuleKey) {
    596 					return ObjLayer::Resources{
    597 						std::make_shared<llvm::SectionMemoryManager>(),
    598 						resolver};
    599 				}),
    600 			compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
    601 			emittedFunctionsNum(0)
    602 		{
    603 		}
    604 
    605 		void startSession()
    606 		{
    607 			::module = new llvm::Module("", *::context);
    608 		}
    609 
    610 		void endSession()
    611 		{
    612 			::function = nullptr;
    613 			::module = nullptr;
    614 		}
    615 
    616 		LLVMRoutine *acquireRoutine(llvm::Function *func)
    617 		{
    618 			std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
    619 			func->setName(name);
    620 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
    621 			func->setDoesNotThrow();
    622 
    623 			std::unique_ptr<llvm::Module> mod(::module);
    624 			::module = nullptr;
    625 			mod->setDataLayout(dataLayout);
    626 
    627 			auto moduleKey = session.allocateVModule();
    628 			llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
    629 
    630 			std::string mangledName;
    631 			{
    632 				llvm::raw_string_ostream mangledNameStream(mangledName);
    633 				llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
    634 			}
    635 
    636 			llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
    637 
    638 			llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
    639 			if(!expectAddr)
    640 			{
    641 				return nullptr;
    642 			}
    643 
    644 			void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
    645 			return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
    646 		}
    647 
    648 		void optimize(llvm::Module *module)
    649 		{
    650 			std::unique_ptr<llvm::legacy::PassManager> passManager(
    651 				new llvm::legacy::PassManager());
    652 
    653 			passManager->add(llvm::createSROAPass());
    654 
    655 			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
    656 			{
    657 				switch(optimization[pass])
    658 				{
    659 				case Disabled:                                                                       break;
    660 				case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
    661 				case LICM:                 passManager->add(llvm::createLICMPass());                 break;
    662 				case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
    663 				case GVN:                  passManager->add(llvm::createGVNPass());                  break;
    664 				case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
    665 				case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
    666 				case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
    667 				case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
    668 				case ScalarReplAggregates: passManager->add(llvm::createSROAPass());                 break;
    669 				default:
    670 				                           assert(false);
    671 				}
    672 			}
    673 
    674 			passManager->run(*::module);
    675 		}
    676 
    677 	private:
    678 		void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
    679 		{
    680 			llvm::cantFail(compileLayer.removeModule(moduleKey));
    681 		}
    682 
    683 		static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
    684 		{
    685 			jit->releaseRoutineModule(moduleKey);
    686 		}
    687 	};
    688 #endif
    689 
    690 	Optimization optimization[10] = {InstructionCombining, Disabled};
    691 
    692 	enum EmulatedType
    693 	{
    694 		Type_v2i32,
    695 		Type_v4i16,
    696 		Type_v2i16,
    697 		Type_v8i8,
    698 		Type_v4i8,
    699 		Type_v2f32,
    700 		EmulatedTypeCount
    701 	};
    702 
    703 	llvm::Type *T(Type *t)
    704 	{
    705 		uintptr_t type = reinterpret_cast<uintptr_t>(t);
    706 		if(type < EmulatedTypeCount)
    707 		{
    708 			// Use 128-bit vectors to implement logically shorter ones.
    709 			switch(type)
    710 			{
    711 			case Type_v2i32: return T(Int4::getType());
    712 			case Type_v4i16: return T(Short8::getType());
    713 			case Type_v2i16: return T(Short8::getType());
    714 			case Type_v8i8:  return T(Byte16::getType());
    715 			case Type_v4i8:  return T(Byte16::getType());
    716 			case Type_v2f32: return T(Float4::getType());
    717 			default: assert(false);
    718 			}
    719 		}
    720 
    721 		return reinterpret_cast<llvm::Type*>(t);
    722 	}
    723 
    724 	inline Type *T(llvm::Type *t)
    725 	{
    726 		return reinterpret_cast<Type*>(t);
    727 	}
    728 
    729 	Type *T(EmulatedType t)
    730 	{
    731 		return reinterpret_cast<Type*>(t);
    732 	}
    733 
    734 	inline llvm::Value *V(Value *t)
    735 	{
    736 		return reinterpret_cast<llvm::Value*>(t);
    737 	}
    738 
    739 	inline Value *V(llvm::Value *t)
    740 	{
    741 		return reinterpret_cast<Value*>(t);
    742 	}
    743 
    744 	inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
    745 	{
    746 		return reinterpret_cast<std::vector<llvm::Type*>&>(t);
    747 	}
    748 
    749 	inline llvm::BasicBlock *B(BasicBlock *t)
    750 	{
    751 		return reinterpret_cast<llvm::BasicBlock*>(t);
    752 	}
    753 
    754 	inline BasicBlock *B(llvm::BasicBlock *t)
    755 	{
    756 		return reinterpret_cast<BasicBlock*>(t);
    757 	}
    758 
    759 	static size_t typeSize(Type *type)
    760 	{
    761 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    762 		if(t < EmulatedTypeCount)
    763 		{
    764 			switch(t)
    765 			{
    766 			case Type_v2i32: return 8;
    767 			case Type_v4i16: return 8;
    768 			case Type_v2i16: return 4;
    769 			case Type_v8i8:  return 8;
    770 			case Type_v4i8:  return 4;
    771 			case Type_v2f32: return 8;
    772 			default: assert(false);
    773 			}
    774 		}
    775 
    776 		return T(type)->getPrimitiveSizeInBits() / 8;
    777 	}
    778 
    779 	static unsigned int elementCount(Type *type)
    780 	{
    781 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    782 		if(t < EmulatedTypeCount)
    783 		{
    784 			switch(t)
    785 			{
    786 			case Type_v2i32: return 2;
    787 			case Type_v4i16: return 4;
    788 			case Type_v2i16: return 2;
    789 			case Type_v8i8:  return 8;
    790 			case Type_v4i8:  return 4;
    791 			case Type_v2f32: return 2;
    792 			default: assert(false);
    793 			}
    794 		}
    795 
    796 		return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
    797 	}
    798 
    799 	Nucleus::Nucleus()
    800 	{
    801 		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
    802 
    803 		llvm::InitializeNativeTarget();
    804 
    805 #if REACTOR_LLVM_VERSION >= 7
    806 		llvm::InitializeNativeTargetAsmPrinter();
    807 		llvm::InitializeNativeTargetAsmParser();
    808 #endif
    809 
    810 		if(!::context)
    811 		{
    812 			::context = new llvm::LLVMContext();
    813 		}
    814 
    815 		#if defined(__x86_64__)
    816 			static const char arch[] = "x86-64";
    817 		#elif defined(__i386__)
    818 			static const char arch[] = "x86";
    819 		#elif defined(__aarch64__)
    820 			static const char arch[] = "arm64";
    821 		#elif defined(__arm__)
    822 			static const char arch[] = "arm";
    823 		#elif defined(__mips__)
    824 			#if defined(__mips64)
    825 			    static const char arch[] = "mips64el";
    826 			#else
    827 			    static const char arch[] = "mipsel";
    828 			#endif
    829 		#else
    830 		#error "unknown architecture"
    831 		#endif
    832 
    833 		llvm::SmallVector<std::string, 1> mattrs;
    834 #if defined(__i386__) || defined(__x86_64__)
    835 		mattrs.push_back(CPUID::supportsMMX()    ? "+mmx"    : "-mmx");
    836 		mattrs.push_back(CPUID::supportsCMOV()   ? "+cmov"   : "-cmov");
    837 		mattrs.push_back(CPUID::supportsSSE()    ? "+sse"    : "-sse");
    838 		mattrs.push_back(CPUID::supportsSSE2()   ? "+sse2"   : "-sse2");
    839 		mattrs.push_back(CPUID::supportsSSE3()   ? "+sse3"   : "-sse3");
    840 		mattrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3"  : "-ssse3");
    841 #if REACTOR_LLVM_VERSION < 7
    842 		mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41"  : "-sse41");
    843 #else
    844 		mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
    845 #endif
    846 #elif defined(__arm__)
    847 #if __ARM_ARCH >= 8
    848 		mattrs.push_back("+armv8-a");
    849 #else
    850 		// armv7-a requires compiler-rt routines; otherwise, compiled kernel
    851 		// might fail to link.
    852 #endif
    853 #endif
    854 
    855 #if REACTOR_LLVM_VERSION < 7
    856 		llvm::JITEmitDebugInfo = false;
    857 		llvm::UnsafeFPMath = true;
    858 		// llvm::NoInfsFPMath = true;
    859 		// llvm::NoNaNsFPMath = true;
    860 #else
    861 		llvm::TargetOptions targetOpts;
    862 		targetOpts.UnsafeFPMath = false;
    863 		// targetOpts.NoInfsFPMath = true;
    864 		// targetOpts.NoNaNsFPMath = true;
    865 #endif
    866 
    867 		if(!::reactorJIT)
    868 		{
    869 #if REACTOR_LLVM_VERSION < 7
    870 			::reactorJIT = new LLVMReactorJIT(arch, mattrs);
    871 #else
    872 			::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
    873 #endif
    874 		}
    875 
    876 		::reactorJIT->startSession();
    877 
    878 		if(!::builder)
    879 		{
    880 			::builder = new llvm::IRBuilder<>(*::context);
    881 		}
    882 	}
    883 
    884 	Nucleus::~Nucleus()
    885 	{
    886 		::reactorJIT->endSession();
    887 
    888 		::codegenMutex.unlock();
    889 	}
    890 
    891 	Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
    892 	{
    893 		if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
    894 		{
    895 			llvm::Type *type = ::function->getReturnType();
    896 
    897 			if(type->isVoidTy())
    898 			{
    899 				createRetVoid();
    900 			}
    901 			else
    902 			{
    903 				createRet(V(llvm::UndefValue::get(type)));
    904 			}
    905 		}
    906 
    907 		if(false)
    908 		{
    909 			#if REACTOR_LLVM_VERSION < 7
    910 				std::string error;
    911 				llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error);
    912 			#else
    913 				std::error_code error;
    914 				llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
    915 			#endif
    916 
    917 			::module->print(file, 0);
    918 		}
    919 
    920 		if(runOptimizations)
    921 		{
    922 			optimize();
    923 		}
    924 
    925 		if(false)
    926 		{
    927 			#if REACTOR_LLVM_VERSION < 7
    928 				std::string error;
    929 				llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error);
    930 			#else
    931 				std::error_code error;
    932 				llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
    933 			#endif
    934 
    935 			::module->print(file, 0);
    936 		}
    937 
    938 		LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
    939 
    940 		return routine;
    941 	}
    942 
    943 	void Nucleus::optimize()
    944 	{
    945 		::reactorJIT->optimize(::module);
    946 	}
    947 
    948 	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
    949 	{
    950 		// Need to allocate it in the entry block for mem2reg to work
    951 		llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
    952 
    953 		llvm::Instruction *declaration;
    954 
    955 		if(arraySize)
    956 		{
    957 #if REACTOR_LLVM_VERSION < 7
    958 			declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize)));
    959 #else
    960 			declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
    961 #endif
    962 		}
    963 		else
    964 		{
    965 #if REACTOR_LLVM_VERSION < 7
    966 			declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr);
    967 #else
    968 			declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
    969 #endif
    970 		}
    971 
    972 		entryBlock.getInstList().push_front(declaration);
    973 
    974 		return V(declaration);
    975 	}
    976 
    977 	BasicBlock *Nucleus::createBasicBlock()
    978 	{
    979 		return B(llvm::BasicBlock::Create(*::context, "", ::function));
    980 	}
    981 
    982 	BasicBlock *Nucleus::getInsertBlock()
    983 	{
    984 		return B(::builder->GetInsertBlock());
    985 	}
    986 
    987 	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
    988 	{
    989 	//	assert(::builder->GetInsertBlock()->back().isTerminator());
    990 		::builder->SetInsertPoint(B(basicBlock));
    991 	}
    992 
    993 	void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
    994 	{
    995 		llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
    996 		::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
    997 		::function->setCallingConv(llvm::CallingConv::C);
    998 
    999 		#if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7
   1000 			// FIXME(capn):
   1001 			// On Windows, stack memory is committed in increments of 4 kB pages, with the last page
   1002 			// having a trap which allows the OS to grow the stack. For functions with a stack frame
   1003 			// larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
   1004 			// page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
   1005 			// the stack and ensure all pages have been committed. This is currently broken in LLVM
   1006 			// JIT, but we can prevent emitting the stack probe call:
   1007 			::function->addFnAttr("stack-probe-size", "1048576");
   1008 		#endif
   1009 
   1010 		::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
   1011 	}
   1012 
   1013 	Value *Nucleus::getArgument(unsigned int index)
   1014 	{
   1015 		llvm::Function::arg_iterator args = ::function->arg_begin();
   1016 
   1017 		while(index)
   1018 		{
   1019 			args++;
   1020 			index--;
   1021 		}
   1022 
   1023 		return V(&*args);
   1024 	}
   1025 
   1026 	void Nucleus::createRetVoid()
   1027 	{
   1028 		::builder->CreateRetVoid();
   1029 	}
   1030 
   1031 	void Nucleus::createRet(Value *v)
   1032 	{
   1033 		::builder->CreateRet(V(v));
   1034 	}
   1035 
   1036 	void Nucleus::createBr(BasicBlock *dest)
   1037 	{
   1038 		::builder->CreateBr(B(dest));
   1039 	}
   1040 
   1041 	void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
   1042 	{
   1043 		::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
   1044 	}
   1045 
   1046 	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
   1047 	{
   1048 		return V(::builder->CreateAdd(V(lhs), V(rhs)));
   1049 	}
   1050 
   1051 	Value *Nucleus::createSub(Value *lhs, Value *rhs)
   1052 	{
   1053 		return V(::builder->CreateSub(V(lhs), V(rhs)));
   1054 	}
   1055 
   1056 	Value *Nucleus::createMul(Value *lhs, Value *rhs)
   1057 	{
   1058 		return V(::builder->CreateMul(V(lhs), V(rhs)));
   1059 	}
   1060 
   1061 	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
   1062 	{
   1063 		return V(::builder->CreateUDiv(V(lhs), V(rhs)));
   1064 	}
   1065 
   1066 	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
   1067 	{
   1068 		return V(::builder->CreateSDiv(V(lhs), V(rhs)));
   1069 	}
   1070 
   1071 	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
   1072 	{
   1073 		return V(::builder->CreateFAdd(V(lhs), V(rhs)));
   1074 	}
   1075 
   1076 	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
   1077 	{
   1078 		return V(::builder->CreateFSub(V(lhs), V(rhs)));
   1079 	}
   1080 
   1081 	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
   1082 	{
   1083 		return V(::builder->CreateFMul(V(lhs), V(rhs)));
   1084 	}
   1085 
   1086 	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
   1087 	{
   1088 		return V(::builder->CreateFDiv(V(lhs), V(rhs)));
   1089 	}
   1090 
   1091 	Value *Nucleus::createURem(Value *lhs, Value *rhs)
   1092 	{
   1093 		return V(::builder->CreateURem(V(lhs), V(rhs)));
   1094 	}
   1095 
   1096 	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
   1097 	{
   1098 		return V(::builder->CreateSRem(V(lhs), V(rhs)));
   1099 	}
   1100 
   1101 	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
   1102 	{
   1103 		return V(::builder->CreateFRem(V(lhs), V(rhs)));
   1104 	}
   1105 
   1106 	Value *Nucleus::createShl(Value *lhs, Value *rhs)
   1107 	{
   1108 		return V(::builder->CreateShl(V(lhs), V(rhs)));
   1109 	}
   1110 
   1111 	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
   1112 	{
   1113 		return V(::builder->CreateLShr(V(lhs), V(rhs)));
   1114 	}
   1115 
   1116 	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
   1117 	{
   1118 		return V(::builder->CreateAShr(V(lhs), V(rhs)));
   1119 	}
   1120 
   1121 	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
   1122 	{
   1123 		return V(::builder->CreateAnd(V(lhs), V(rhs)));
   1124 	}
   1125 
   1126 	Value *Nucleus::createOr(Value *lhs, Value *rhs)
   1127 	{
   1128 		return V(::builder->CreateOr(V(lhs), V(rhs)));
   1129 	}
   1130 
   1131 	Value *Nucleus::createXor(Value *lhs, Value *rhs)
   1132 	{
   1133 		return V(::builder->CreateXor(V(lhs), V(rhs)));
   1134 	}
   1135 
   1136 	Value *Nucleus::createNeg(Value *v)
   1137 	{
   1138 		return V(::builder->CreateNeg(V(v)));
   1139 	}
   1140 
   1141 	Value *Nucleus::createFNeg(Value *v)
   1142 	{
   1143 		return V(::builder->CreateFNeg(V(v)));
   1144 	}
   1145 
   1146 	Value *Nucleus::createNot(Value *v)
   1147 	{
   1148 		return V(::builder->CreateNot(V(v)));
   1149 	}
   1150 
   1151 	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
   1152 	{
   1153 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
   1154 		if(t < EmulatedTypeCount)
   1155 		{
   1156 			switch(t)
   1157 			{
   1158 			case Type_v2i32:
   1159 			case Type_v4i16:
   1160 			case Type_v8i8:
   1161 			case Type_v2f32:
   1162 				return createBitCast(
   1163 					createInsertElement(
   1164 						V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
   1165 						createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment),
   1166 						0),
   1167 					type);
   1168 			case Type_v2i16:
   1169 			case Type_v4i8:
   1170 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
   1171 				{
   1172 					Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
   1173 					Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
   1174 					i = createZExt(i, Long::getType());
   1175 					Value *v = createInsertElement(u, i, 0);
   1176 					return createBitCast(v, type);
   1177 				}
   1178 				break;
   1179 			default:
   1180 				assert(false);
   1181 			}
   1182 		}
   1183 
   1184 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
   1185 		return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment)));
   1186 	}
   1187 
   1188 	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
   1189 	{
   1190 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
   1191 		if(t < EmulatedTypeCount)
   1192 		{
   1193 			switch(t)
   1194 			{
   1195 			case Type_v2i32:
   1196 			case Type_v4i16:
   1197 			case Type_v8i8:
   1198 			case Type_v2f32:
   1199 				createStore(
   1200 					createExtractElement(
   1201 						createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
   1202 					createBitCast(ptr, Pointer<Long>::getType()),
   1203 					Long::getType(), isVolatile, alignment);
   1204 				return value;
   1205 			case Type_v2i16:
   1206 			case Type_v4i8:
   1207 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
   1208 				{
   1209 					createStore(
   1210 						createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
   1211 						createBitCast(ptr, Pointer<Int>::getType()),
   1212 						Int::getType(), isVolatile, alignment);
   1213 					return value;
   1214 				}
   1215 				break;
   1216 			default:
   1217 				assert(false);
   1218 			}
   1219 		}
   1220 
   1221 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
   1222 		::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
   1223 		return value;
   1224 	}
   1225 
   1226 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
   1227 	{
   1228 		if(sizeof(void*) == 8)
   1229 		{
   1230 			if(unsignedIndex)
   1231 			{
   1232 				index = createZExt(index, Long::getType());
   1233 			}
   1234 			else
   1235 			{
   1236 				index = createSExt(index, Long::getType());
   1237 			}
   1238 
   1239 			index = createMul(index, createConstantLong((int64_t)typeSize(type)));
   1240 		}
   1241 		else
   1242 		{
   1243 			index = createMul(index, createConstantInt((int)typeSize(type)));
   1244 		}
   1245 
   1246 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
   1247 		return createBitCast(
   1248 			V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
   1249 			T(llvm::PointerType::get(T(type), 0)));
   1250 	}
   1251 
   1252 	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
   1253 	{
   1254 		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
   1255 	}
   1256 
   1257 	Value *Nucleus::createTrunc(Value *v, Type *destType)
   1258 	{
   1259 		return V(::builder->CreateTrunc(V(v), T(destType)));
   1260 	}
   1261 
   1262 	Value *Nucleus::createZExt(Value *v, Type *destType)
   1263 	{
   1264 		return V(::builder->CreateZExt(V(v), T(destType)));
   1265 	}
   1266 
   1267 	Value *Nucleus::createSExt(Value *v, Type *destType)
   1268 	{
   1269 		return V(::builder->CreateSExt(V(v), T(destType)));
   1270 	}
   1271 
   1272 	Value *Nucleus::createFPToSI(Value *v, Type *destType)
   1273 	{
   1274 		return V(::builder->CreateFPToSI(V(v), T(destType)));
   1275 	}
   1276 
   1277 	Value *Nucleus::createSIToFP(Value *v, Type *destType)
   1278 	{
   1279 		return V(::builder->CreateSIToFP(V(v), T(destType)));
   1280 	}
   1281 
   1282 	Value *Nucleus::createFPTrunc(Value *v, Type *destType)
   1283 	{
   1284 		return V(::builder->CreateFPTrunc(V(v), T(destType)));
   1285 	}
   1286 
   1287 	Value *Nucleus::createFPExt(Value *v, Type *destType)
   1288 	{
   1289 		return V(::builder->CreateFPExt(V(v), T(destType)));
   1290 	}
   1291 
   1292 	Value *Nucleus::createBitCast(Value *v, Type *destType)
   1293 	{
   1294 		// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
   1295 		// support for casting between scalars and wide vectors. Emulate them by writing to the stack and
   1296 		// reading back as the destination type.
   1297 		if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
   1298 		{
   1299 			Value *readAddress = allocateStackVariable(destType);
   1300 			Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
   1301 			createStore(v, writeAddress, T(V(v)->getType()));
   1302 			return createLoad(readAddress, destType);
   1303 		}
   1304 		else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
   1305 		{
   1306 			Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
   1307 			createStore(v, writeAddress, T(V(v)->getType()));
   1308 			Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
   1309 			return createLoad(readAddress, destType);
   1310 		}
   1311 
   1312 		return V(::builder->CreateBitCast(V(v), T(destType)));
   1313 	}
   1314 
   1315 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
   1316 	{
   1317 		return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
   1318 	}
   1319 
   1320 	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
   1321 	{
   1322 		return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
   1323 	}
   1324 
   1325 	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
   1326 	{
   1327 		return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
   1328 	}
   1329 
   1330 	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
   1331 	{
   1332 		return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
   1333 	}
   1334 
   1335 	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
   1336 	{
   1337 		return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
   1338 	}
   1339 
   1340 	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
   1341 	{
   1342 		return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
   1343 	}
   1344 
   1345 	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
   1346 	{
   1347 		return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
   1348 	}
   1349 
   1350 	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
   1351 	{
   1352 		return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
   1353 	}
   1354 
   1355 	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
   1356 	{
   1357 		return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
   1358 	}
   1359 
   1360 	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
   1361 	{
   1362 		return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
   1363 	}
   1364 
   1365 	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
   1366 	{
   1367 		return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
   1368 	}
   1369 
   1370 	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
   1371 	{
   1372 		return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
   1373 	}
   1374 
   1375 	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
   1376 	{
   1377 		return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
   1378 	}
   1379 
   1380 	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
   1381 	{
   1382 		return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
   1383 	}
   1384 
   1385 	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
   1386 	{
   1387 		return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
   1388 	}
   1389 
   1390 	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
   1391 	{
   1392 		return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
   1393 	}
   1394 
   1395 	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
   1396 	{
   1397 		return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
   1398 	}
   1399 
   1400 	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
   1401 	{
   1402 		return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
   1403 	}
   1404 
   1405 	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
   1406 	{
   1407 		return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
   1408 	}
   1409 
   1410 	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
   1411 	{
   1412 		return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
   1413 	}
   1414 
   1415 	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
   1416 	{
   1417 		return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
   1418 	}
   1419 
   1420 	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
   1421 	{
   1422 		return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
   1423 	}
   1424 
   1425 	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
   1426 	{
   1427 		return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
   1428 	}
   1429 
   1430 	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
   1431 	{
   1432 		return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
   1433 	}
   1434 
   1435 	Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
   1436 	{
   1437 		assert(V(vector)->getType()->getContainedType(0) == T(type));
   1438 		return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
   1439 	}
   1440 
   1441 	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
   1442 	{
   1443 		return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
   1444 	}
   1445 
   1446 	Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
   1447 	{
   1448 		int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
   1449 		const int maxSize = 16;
   1450 		llvm::Constant *swizzle[maxSize];
   1451 		assert(size <= maxSize);
   1452 
   1453 		for(int i = 0; i < size; i++)
   1454 		{
   1455 			swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
   1456 		}
   1457 
   1458 		llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
   1459 
   1460 		return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
   1461 	}
   1462 
   1463 	Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
   1464 	{
   1465 		return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
   1466 	}
   1467 
   1468 	SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
   1469 	{
   1470 		return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
   1471 	}
   1472 
   1473 	void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
   1474 	{
   1475 		llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
   1476 		sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
   1477 	}
   1478 
   1479 	void Nucleus::createUnreachable()
   1480 	{
   1481 		::builder->CreateUnreachable();
   1482 	}
   1483 
   1484 	static Value *createSwizzle4(Value *val, unsigned char select)
   1485 	{
   1486 		int swizzle[4] =
   1487 		{
   1488 			(select >> 0) & 0x03,
   1489 			(select >> 2) & 0x03,
   1490 			(select >> 4) & 0x03,
   1491 			(select >> 6) & 0x03,
   1492 		};
   1493 
   1494 		return Nucleus::createShuffleVector(val, val, swizzle);
   1495 	}
   1496 
   1497 	static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
   1498 	{
   1499 		bool mask[4] = {false, false, false, false};
   1500 
   1501 		mask[(select >> 0) & 0x03] = true;
   1502 		mask[(select >> 2) & 0x03] = true;
   1503 		mask[(select >> 4) & 0x03] = true;
   1504 		mask[(select >> 6) & 0x03] = true;
   1505 
   1506 		int swizzle[4] =
   1507 		{
   1508 			mask[0] ? 4 : 0,
   1509 			mask[1] ? 5 : 1,
   1510 			mask[2] ? 6 : 2,
   1511 			mask[3] ? 7 : 3,
   1512 		};
   1513 
   1514 		return Nucleus::createShuffleVector(lhs, rhs, swizzle);
   1515 	}
   1516 
   1517 	Type *Nucleus::getPointerType(Type *ElementType)
   1518 	{
   1519 		return T(llvm::PointerType::get(T(ElementType), 0));
   1520 	}
   1521 
   1522 	Value *Nucleus::createNullValue(Type *Ty)
   1523 	{
   1524 		return V(llvm::Constant::getNullValue(T(Ty)));
   1525 	}
   1526 
   1527 	Value *Nucleus::createConstantLong(int64_t i)
   1528 	{
   1529 		return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
   1530 	}
   1531 
   1532 	Value *Nucleus::createConstantInt(int i)
   1533 	{
   1534 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
   1535 	}
   1536 
   1537 	Value *Nucleus::createConstantInt(unsigned int i)
   1538 	{
   1539 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
   1540 	}
   1541 
   1542 	Value *Nucleus::createConstantBool(bool b)
   1543 	{
   1544 		return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
   1545 	}
   1546 
   1547 	Value *Nucleus::createConstantByte(signed char i)
   1548 	{
   1549 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
   1550 	}
   1551 
   1552 	Value *Nucleus::createConstantByte(unsigned char i)
   1553 	{
   1554 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
   1555 	}
   1556 
   1557 	Value *Nucleus::createConstantShort(short i)
   1558 	{
   1559 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
   1560 	}
   1561 
   1562 	Value *Nucleus::createConstantShort(unsigned short i)
   1563 	{
   1564 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
   1565 	}
   1566 
   1567 	Value *Nucleus::createConstantFloat(float x)
   1568 	{
   1569 		return V(llvm::ConstantFP::get(T(Float::getType()), x));
   1570 	}
   1571 
   1572 	Value *Nucleus::createNullPointer(Type *Ty)
   1573 	{
   1574 		return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
   1575 	}
   1576 
   1577 	Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
   1578 	{
   1579 		assert(llvm::isa<llvm::VectorType>(T(type)));
   1580 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
   1581 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
   1582 		assert(numElements <= 16 && numConstants <= numElements);
   1583 		llvm::Constant *constantVector[16];
   1584 
   1585 		for(int i = 0; i < numElements; i++)
   1586 		{
   1587 			constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
   1588 		}
   1589 
   1590 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
   1591 	}
   1592 
   1593 	Value *Nucleus::createConstantVector(const double *constants, Type *type)
   1594 	{
   1595 		assert(llvm::isa<llvm::VectorType>(T(type)));
   1596 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
   1597 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
   1598 		assert(numElements <= 8 && numConstants <= numElements);
   1599 		llvm::Constant *constantVector[8];
   1600 
   1601 		for(int i = 0; i < numElements; i++)
   1602 		{
   1603 			constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
   1604 		}
   1605 
   1606 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
   1607 	}
   1608 
   1609 	Type *Void::getType()
   1610 	{
   1611 		return T(llvm::Type::getVoidTy(*::context));
   1612 	}
   1613 
   1614 	Bool::Bool(Argument<Bool> argument)
   1615 	{
   1616 		storeValue(argument.value);
   1617 	}
   1618 
   1619 	Bool::Bool(bool x)
   1620 	{
   1621 		storeValue(Nucleus::createConstantBool(x));
   1622 	}
   1623 
   1624 	Bool::Bool(RValue<Bool> rhs)
   1625 	{
   1626 		storeValue(rhs.value);
   1627 	}
   1628 
   1629 	Bool::Bool(const Bool &rhs)
   1630 	{
   1631 		Value *value = rhs.loadValue();
   1632 		storeValue(value);
   1633 	}
   1634 
   1635 	Bool::Bool(const Reference<Bool> &rhs)
   1636 	{
   1637 		Value *value = rhs.loadValue();
   1638 		storeValue(value);
   1639 	}
   1640 
   1641 	RValue<Bool> Bool::operator=(RValue<Bool> rhs)
   1642 	{
   1643 		storeValue(rhs.value);
   1644 
   1645 		return rhs;
   1646 	}
   1647 
   1648 	RValue<Bool> Bool::operator=(const Bool &rhs)
   1649 	{
   1650 		Value *value = rhs.loadValue();
   1651 		storeValue(value);
   1652 
   1653 		return RValue<Bool>(value);
   1654 	}
   1655 
   1656 	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
   1657 	{
   1658 		Value *value = rhs.loadValue();
   1659 		storeValue(value);
   1660 
   1661 		return RValue<Bool>(value);
   1662 	}
   1663 
   1664 	RValue<Bool> operator!(RValue<Bool> val)
   1665 	{
   1666 		return RValue<Bool>(Nucleus::createNot(val.value));
   1667 	}
   1668 
   1669 	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
   1670 	{
   1671 		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
   1672 	}
   1673 
   1674 	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
   1675 	{
   1676 		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
   1677 	}
   1678 
   1679 	Type *Bool::getType()
   1680 	{
   1681 		return T(llvm::Type::getInt1Ty(*::context));
   1682 	}
   1683 
   1684 	Byte::Byte(Argument<Byte> argument)
   1685 	{
   1686 		storeValue(argument.value);
   1687 	}
   1688 
   1689 	Byte::Byte(RValue<Int> cast)
   1690 	{
   1691 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1692 
   1693 		storeValue(integer);
   1694 	}
   1695 
   1696 	Byte::Byte(RValue<UInt> cast)
   1697 	{
   1698 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1699 
   1700 		storeValue(integer);
   1701 	}
   1702 
   1703 	Byte::Byte(RValue<UShort> cast)
   1704 	{
   1705 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1706 
   1707 		storeValue(integer);
   1708 	}
   1709 
   1710 	Byte::Byte(int x)
   1711 	{
   1712 		storeValue(Nucleus::createConstantByte((unsigned char)x));
   1713 	}
   1714 
   1715 	Byte::Byte(unsigned char x)
   1716 	{
   1717 		storeValue(Nucleus::createConstantByte(x));
   1718 	}
   1719 
   1720 	Byte::Byte(RValue<Byte> rhs)
   1721 	{
   1722 		storeValue(rhs.value);
   1723 	}
   1724 
   1725 	Byte::Byte(const Byte &rhs)
   1726 	{
   1727 		Value *value = rhs.loadValue();
   1728 		storeValue(value);
   1729 	}
   1730 
   1731 	Byte::Byte(const Reference<Byte> &rhs)
   1732 	{
   1733 		Value *value = rhs.loadValue();
   1734 		storeValue(value);
   1735 	}
   1736 
   1737 	RValue<Byte> Byte::operator=(RValue<Byte> rhs)
   1738 	{
   1739 		storeValue(rhs.value);
   1740 
   1741 		return rhs;
   1742 	}
   1743 
   1744 	RValue<Byte> Byte::operator=(const Byte &rhs)
   1745 	{
   1746 		Value *value = rhs.loadValue();
   1747 		storeValue(value);
   1748 
   1749 		return RValue<Byte>(value);
   1750 	}
   1751 
   1752 	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
   1753 	{
   1754 		Value *value = rhs.loadValue();
   1755 		storeValue(value);
   1756 
   1757 		return RValue<Byte>(value);
   1758 	}
   1759 
   1760 	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
   1761 	{
   1762 		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
   1763 	}
   1764 
   1765 	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
   1766 	{
   1767 		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
   1768 	}
   1769 
   1770 	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
   1771 	{
   1772 		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
   1773 	}
   1774 
   1775 	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
   1776 	{
   1777 		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
   1778 	}
   1779 
   1780 	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
   1781 	{
   1782 		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
   1783 	}
   1784 
   1785 	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
   1786 	{
   1787 		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
   1788 	}
   1789 
   1790 	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
   1791 	{
   1792 		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
   1793 	}
   1794 
   1795 	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
   1796 	{
   1797 		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
   1798 	}
   1799 
   1800 	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
   1801 	{
   1802 		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
   1803 	}
   1804 
   1805 	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
   1806 	{
   1807 		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
   1808 	}
   1809 
   1810 	RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
   1811 	{
   1812 		return lhs = lhs + rhs;
   1813 	}
   1814 
   1815 	RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
   1816 	{
   1817 		return lhs = lhs - rhs;
   1818 	}
   1819 
   1820 	RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
   1821 	{
   1822 		return lhs = lhs * rhs;
   1823 	}
   1824 
   1825 	RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
   1826 	{
   1827 		return lhs = lhs / rhs;
   1828 	}
   1829 
   1830 	RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
   1831 	{
   1832 		return lhs = lhs % rhs;
   1833 	}
   1834 
   1835 	RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
   1836 	{
   1837 		return lhs = lhs & rhs;
   1838 	}
   1839 
   1840 	RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
   1841 	{
   1842 		return lhs = lhs | rhs;
   1843 	}
   1844 
   1845 	RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
   1846 	{
   1847 		return lhs = lhs ^ rhs;
   1848 	}
   1849 
   1850 	RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
   1851 	{
   1852 		return lhs = lhs << rhs;
   1853 	}
   1854 
   1855 	RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
   1856 	{
   1857 		return lhs = lhs >> rhs;
   1858 	}
   1859 
   1860 	RValue<Byte> operator+(RValue<Byte> val)
   1861 	{
   1862 		return val;
   1863 	}
   1864 
   1865 	RValue<Byte> operator-(RValue<Byte> val)
   1866 	{
   1867 		return RValue<Byte>(Nucleus::createNeg(val.value));
   1868 	}
   1869 
   1870 	RValue<Byte> operator~(RValue<Byte> val)
   1871 	{
   1872 		return RValue<Byte>(Nucleus::createNot(val.value));
   1873 	}
   1874 
   1875 	RValue<Byte> operator++(Byte &val, int)   // Post-increment
   1876 	{
   1877 		RValue<Byte> res = val;
   1878 
   1879 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
   1880 		val.storeValue(inc);
   1881 
   1882 		return res;
   1883 	}
   1884 
   1885 	const Byte &operator++(Byte &val)   // Pre-increment
   1886 	{
   1887 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
   1888 		val.storeValue(inc);
   1889 
   1890 		return val;
   1891 	}
   1892 
   1893 	RValue<Byte> operator--(Byte &val, int)   // Post-decrement
   1894 	{
   1895 		RValue<Byte> res = val;
   1896 
   1897 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
   1898 		val.storeValue(inc);
   1899 
   1900 		return res;
   1901 	}
   1902 
   1903 	const Byte &operator--(Byte &val)   // Pre-decrement
   1904 	{
   1905 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
   1906 		val.storeValue(inc);
   1907 
   1908 		return val;
   1909 	}
   1910 
   1911 	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
   1912 	{
   1913 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   1914 	}
   1915 
   1916 	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
   1917 	{
   1918 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   1919 	}
   1920 
   1921 	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
   1922 	{
   1923 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   1924 	}
   1925 
   1926 	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
   1927 	{
   1928 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   1929 	}
   1930 
   1931 	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
   1932 	{
   1933 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1934 	}
   1935 
   1936 	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
   1937 	{
   1938 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1939 	}
   1940 
   1941 	Type *Byte::getType()
   1942 	{
   1943 		return T(llvm::Type::getInt8Ty(*::context));
   1944 	}
   1945 
   1946 	SByte::SByte(Argument<SByte> argument)
   1947 	{
   1948 		storeValue(argument.value);
   1949 	}
   1950 
   1951 	SByte::SByte(RValue<Int> cast)
   1952 	{
   1953 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1954 
   1955 		storeValue(integer);
   1956 	}
   1957 
   1958 	SByte::SByte(RValue<Short> cast)
   1959 	{
   1960 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1961 
   1962 		storeValue(integer);
   1963 	}
   1964 
   1965 	SByte::SByte(signed char x)
   1966 	{
   1967 		storeValue(Nucleus::createConstantByte(x));
   1968 	}
   1969 
   1970 	SByte::SByte(RValue<SByte> rhs)
   1971 	{
   1972 		storeValue(rhs.value);
   1973 	}
   1974 
   1975 	SByte::SByte(const SByte &rhs)
   1976 	{
   1977 		Value *value = rhs.loadValue();
   1978 		storeValue(value);
   1979 	}
   1980 
   1981 	SByte::SByte(const Reference<SByte> &rhs)
   1982 	{
   1983 		Value *value = rhs.loadValue();
   1984 		storeValue(value);
   1985 	}
   1986 
   1987 	RValue<SByte> SByte::operator=(RValue<SByte> rhs)
   1988 	{
   1989 		storeValue(rhs.value);
   1990 
   1991 		return rhs;
   1992 	}
   1993 
   1994 	RValue<SByte> SByte::operator=(const SByte &rhs)
   1995 	{
   1996 		Value *value = rhs.loadValue();
   1997 		storeValue(value);
   1998 
   1999 		return RValue<SByte>(value);
   2000 	}
   2001 
   2002 	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
   2003 	{
   2004 		Value *value = rhs.loadValue();
   2005 		storeValue(value);
   2006 
   2007 		return RValue<SByte>(value);
   2008 	}
   2009 
   2010 	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
   2011 	{
   2012 		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
   2013 	}
   2014 
   2015 	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
   2016 	{
   2017 		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
   2018 	}
   2019 
   2020 	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
   2021 	{
   2022 		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
   2023 	}
   2024 
   2025 	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
   2026 	{
   2027 		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
   2028 	}
   2029 
   2030 	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
   2031 	{
   2032 		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
   2033 	}
   2034 
   2035 	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
   2036 	{
   2037 		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
   2038 	}
   2039 
   2040 	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
   2041 	{
   2042 		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
   2043 	}
   2044 
   2045 	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
   2046 	{
   2047 		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
   2048 	}
   2049 
   2050 	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
   2051 	{
   2052 		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
   2053 	}
   2054 
   2055 	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
   2056 	{
   2057 		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
   2058 	}
   2059 
   2060 	RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
   2061 	{
   2062 		return lhs = lhs + rhs;
   2063 	}
   2064 
   2065 	RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
   2066 	{
   2067 		return lhs = lhs - rhs;
   2068 	}
   2069 
   2070 	RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
   2071 	{
   2072 		return lhs = lhs * rhs;
   2073 	}
   2074 
   2075 	RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
   2076 	{
   2077 		return lhs = lhs / rhs;
   2078 	}
   2079 
   2080 	RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
   2081 	{
   2082 		return lhs = lhs % rhs;
   2083 	}
   2084 
   2085 	RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
   2086 	{
   2087 		return lhs = lhs & rhs;
   2088 	}
   2089 
   2090 	RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
   2091 	{
   2092 		return lhs = lhs | rhs;
   2093 	}
   2094 
   2095 	RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
   2096 	{
   2097 		return lhs = lhs ^ rhs;
   2098 	}
   2099 
   2100 	RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
   2101 	{
   2102 		return lhs = lhs << rhs;
   2103 	}
   2104 
   2105 	RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
   2106 	{
   2107 		return lhs = lhs >> rhs;
   2108 	}
   2109 
   2110 	RValue<SByte> operator+(RValue<SByte> val)
   2111 	{
   2112 		return val;
   2113 	}
   2114 
   2115 	RValue<SByte> operator-(RValue<SByte> val)
   2116 	{
   2117 		return RValue<SByte>(Nucleus::createNeg(val.value));
   2118 	}
   2119 
   2120 	RValue<SByte> operator~(RValue<SByte> val)
   2121 	{
   2122 		return RValue<SByte>(Nucleus::createNot(val.value));
   2123 	}
   2124 
   2125 	RValue<SByte> operator++(SByte &val, int)   // Post-increment
   2126 	{
   2127 		RValue<SByte> res = val;
   2128 
   2129 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
   2130 		val.storeValue(inc);
   2131 
   2132 		return res;
   2133 	}
   2134 
   2135 	const SByte &operator++(SByte &val)   // Pre-increment
   2136 	{
   2137 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
   2138 		val.storeValue(inc);
   2139 
   2140 		return val;
   2141 	}
   2142 
   2143 	RValue<SByte> operator--(SByte &val, int)   // Post-decrement
   2144 	{
   2145 		RValue<SByte> res = val;
   2146 
   2147 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
   2148 		val.storeValue(inc);
   2149 
   2150 		return res;
   2151 	}
   2152 
   2153 	const SByte &operator--(SByte &val)   // Pre-decrement
   2154 	{
   2155 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
   2156 		val.storeValue(inc);
   2157 
   2158 		return val;
   2159 	}
   2160 
   2161 	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
   2162 	{
   2163 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   2164 	}
   2165 
   2166 	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
   2167 	{
   2168 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   2169 	}
   2170 
   2171 	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
   2172 	{
   2173 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   2174 	}
   2175 
   2176 	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
   2177 	{
   2178 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   2179 	}
   2180 
   2181 	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
   2182 	{
   2183 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   2184 	}
   2185 
   2186 	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
   2187 	{
   2188 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   2189 	}
   2190 
   2191 	Type *SByte::getType()
   2192 	{
   2193 		return T(llvm::Type::getInt8Ty(*::context));
   2194 	}
   2195 
   2196 	Short::Short(Argument<Short> argument)
   2197 	{
   2198 		storeValue(argument.value);
   2199 	}
   2200 
   2201 	Short::Short(RValue<Int> cast)
   2202 	{
   2203 		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
   2204 
   2205 		storeValue(integer);
   2206 	}
   2207 
   2208 	Short::Short(short x)
   2209 	{
   2210 		storeValue(Nucleus::createConstantShort(x));
   2211 	}
   2212 
   2213 	Short::Short(RValue<Short> rhs)
   2214 	{
   2215 		storeValue(rhs.value);
   2216 	}
   2217 
   2218 	Short::Short(const Short &rhs)
   2219 	{
   2220 		Value *value = rhs.loadValue();
   2221 		storeValue(value);
   2222 	}
   2223 
   2224 	Short::Short(const Reference<Short> &rhs)
   2225 	{
   2226 		Value *value = rhs.loadValue();
   2227 		storeValue(value);
   2228 	}
   2229 
   2230 	RValue<Short> Short::operator=(RValue<Short> rhs)
   2231 	{
   2232 		storeValue(rhs.value);
   2233 
   2234 		return rhs;
   2235 	}
   2236 
   2237 	RValue<Short> Short::operator=(const Short &rhs)
   2238 	{
   2239 		Value *value = rhs.loadValue();
   2240 		storeValue(value);
   2241 
   2242 		return RValue<Short>(value);
   2243 	}
   2244 
   2245 	RValue<Short> Short::operator=(const Reference<Short> &rhs)
   2246 	{
   2247 		Value *value = rhs.loadValue();
   2248 		storeValue(value);
   2249 
   2250 		return RValue<Short>(value);
   2251 	}
   2252 
   2253 	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
   2254 	{
   2255 		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
   2256 	}
   2257 
   2258 	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
   2259 	{
   2260 		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
   2261 	}
   2262 
   2263 	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
   2264 	{
   2265 		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
   2266 	}
   2267 
   2268 	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
   2269 	{
   2270 		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
   2271 	}
   2272 
   2273 	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
   2274 	{
   2275 		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
   2276 	}
   2277 
   2278 	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
   2279 	{
   2280 		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
   2281 	}
   2282 
   2283 	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
   2284 	{
   2285 		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
   2286 	}
   2287 
   2288 	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
   2289 	{
   2290 		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
   2291 	}
   2292 
   2293 	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
   2294 	{
   2295 		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
   2296 	}
   2297 
   2298 	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
   2299 	{
   2300 		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
   2301 	}
   2302 
   2303 	RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
   2304 	{
   2305 		return lhs = lhs + rhs;
   2306 	}
   2307 
   2308 	RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
   2309 	{
   2310 		return lhs = lhs - rhs;
   2311 	}
   2312 
   2313 	RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
   2314 	{
   2315 		return lhs = lhs * rhs;
   2316 	}
   2317 
   2318 	RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
   2319 	{
   2320 		return lhs = lhs / rhs;
   2321 	}
   2322 
   2323 	RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
   2324 	{
   2325 		return lhs = lhs % rhs;
   2326 	}
   2327 
   2328 	RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
   2329 	{
   2330 		return lhs = lhs & rhs;
   2331 	}
   2332 
   2333 	RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
   2334 	{
   2335 		return lhs = lhs | rhs;
   2336 	}
   2337 
   2338 	RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
   2339 	{
   2340 		return lhs = lhs ^ rhs;
   2341 	}
   2342 
   2343 	RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
   2344 	{
   2345 		return lhs = lhs << rhs;
   2346 	}
   2347 
   2348 	RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
   2349 	{
   2350 		return lhs = lhs >> rhs;
   2351 	}
   2352 
   2353 	RValue<Short> operator+(RValue<Short> val)
   2354 	{
   2355 		return val;
   2356 	}
   2357 
   2358 	RValue<Short> operator-(RValue<Short> val)
   2359 	{
   2360 		return RValue<Short>(Nucleus::createNeg(val.value));
   2361 	}
   2362 
   2363 	RValue<Short> operator~(RValue<Short> val)
   2364 	{
   2365 		return RValue<Short>(Nucleus::createNot(val.value));
   2366 	}
   2367 
   2368 	RValue<Short> operator++(Short &val, int)   // Post-increment
   2369 	{
   2370 		RValue<Short> res = val;
   2371 
   2372 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
   2373 		val.storeValue(inc);
   2374 
   2375 		return res;
   2376 	}
   2377 
   2378 	const Short &operator++(Short &val)   // Pre-increment
   2379 	{
   2380 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
   2381 		val.storeValue(inc);
   2382 
   2383 		return val;
   2384 	}
   2385 
   2386 	RValue<Short> operator--(Short &val, int)   // Post-decrement
   2387 	{
   2388 		RValue<Short> res = val;
   2389 
   2390 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
   2391 		val.storeValue(inc);
   2392 
   2393 		return res;
   2394 	}
   2395 
   2396 	const Short &operator--(Short &val)   // Pre-decrement
   2397 	{
   2398 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
   2399 		val.storeValue(inc);
   2400 
   2401 		return val;
   2402 	}
   2403 
   2404 	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
   2405 	{
   2406 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   2407 	}
   2408 
   2409 	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
   2410 	{
   2411 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   2412 	}
   2413 
   2414 	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
   2415 	{
   2416 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   2417 	}
   2418 
   2419 	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
   2420 	{
   2421 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   2422 	}
   2423 
   2424 	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
   2425 	{
   2426 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   2427 	}
   2428 
   2429 	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
   2430 	{
   2431 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   2432 	}
   2433 
   2434 	Type *Short::getType()
   2435 	{
   2436 		return T(llvm::Type::getInt16Ty(*::context));
   2437 	}
   2438 
   2439 	UShort::UShort(Argument<UShort> argument)
   2440 	{
   2441 		storeValue(argument.value);
   2442 	}
   2443 
   2444 	UShort::UShort(RValue<UInt> cast)
   2445 	{
   2446 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   2447 
   2448 		storeValue(integer);
   2449 	}
   2450 
   2451 	UShort::UShort(RValue<Int> cast)
   2452 	{
   2453 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   2454 
   2455 		storeValue(integer);
   2456 	}
   2457 
   2458 	UShort::UShort(unsigned short x)
   2459 	{
   2460 		storeValue(Nucleus::createConstantShort(x));
   2461 	}
   2462 
   2463 	UShort::UShort(RValue<UShort> rhs)
   2464 	{
   2465 		storeValue(rhs.value);
   2466 	}
   2467 
   2468 	UShort::UShort(const UShort &rhs)
   2469 	{
   2470 		Value *value = rhs.loadValue();
   2471 		storeValue(value);
   2472 	}
   2473 
   2474 	UShort::UShort(const Reference<UShort> &rhs)
   2475 	{
   2476 		Value *value = rhs.loadValue();
   2477 		storeValue(value);
   2478 	}
   2479 
   2480 	RValue<UShort> UShort::operator=(RValue<UShort> rhs)
   2481 	{
   2482 		storeValue(rhs.value);
   2483 
   2484 		return rhs;
   2485 	}
   2486 
   2487 	RValue<UShort> UShort::operator=(const UShort &rhs)
   2488 	{
   2489 		Value *value = rhs.loadValue();
   2490 		storeValue(value);
   2491 
   2492 		return RValue<UShort>(value);
   2493 	}
   2494 
   2495 	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
   2496 	{
   2497 		Value *value = rhs.loadValue();
   2498 		storeValue(value);
   2499 
   2500 		return RValue<UShort>(value);
   2501 	}
   2502 
   2503 	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
   2504 	{
   2505 		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
   2506 	}
   2507 
   2508 	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
   2509 	{
   2510 		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
   2511 	}
   2512 
   2513 	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
   2514 	{
   2515 		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
   2516 	}
   2517 
   2518 	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
   2519 	{
   2520 		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
   2521 	}
   2522 
   2523 	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
   2524 	{
   2525 		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
   2526 	}
   2527 
   2528 	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
   2529 	{
   2530 		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
   2531 	}
   2532 
   2533 	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
   2534 	{
   2535 		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
   2536 	}
   2537 
   2538 	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
   2539 	{
   2540 		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
   2541 	}
   2542 
   2543 	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
   2544 	{
   2545 		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
   2546 	}
   2547 
   2548 	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
   2549 	{
   2550 		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
   2551 	}
   2552 
   2553 	RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
   2554 	{
   2555 		return lhs = lhs + rhs;
   2556 	}
   2557 
   2558 	RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
   2559 	{
   2560 		return lhs = lhs - rhs;
   2561 	}
   2562 
   2563 	RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
   2564 	{
   2565 		return lhs = lhs * rhs;
   2566 	}
   2567 
   2568 	RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
   2569 	{
   2570 		return lhs = lhs / rhs;
   2571 	}
   2572 
   2573 	RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
   2574 	{
   2575 		return lhs = lhs % rhs;
   2576 	}
   2577 
   2578 	RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
   2579 	{
   2580 		return lhs = lhs & rhs;
   2581 	}
   2582 
   2583 	RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
   2584 	{
   2585 		return lhs = lhs | rhs;
   2586 	}
   2587 
   2588 	RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
   2589 	{
   2590 		return lhs = lhs ^ rhs;
   2591 	}
   2592 
   2593 	RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
   2594 	{
   2595 		return lhs = lhs << rhs;
   2596 	}
   2597 
   2598 	RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
   2599 	{
   2600 		return lhs = lhs >> rhs;
   2601 	}
   2602 
   2603 	RValue<UShort> operator+(RValue<UShort> val)
   2604 	{
   2605 		return val;
   2606 	}
   2607 
   2608 	RValue<UShort> operator-(RValue<UShort> val)
   2609 	{
   2610 		return RValue<UShort>(Nucleus::createNeg(val.value));
   2611 	}
   2612 
   2613 	RValue<UShort> operator~(RValue<UShort> val)
   2614 	{
   2615 		return RValue<UShort>(Nucleus::createNot(val.value));
   2616 	}
   2617 
   2618 	RValue<UShort> operator++(UShort &val, int)   // Post-increment
   2619 	{
   2620 		RValue<UShort> res = val;
   2621 
   2622 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
   2623 		val.storeValue(inc);
   2624 
   2625 		return res;
   2626 	}
   2627 
   2628 	const UShort &operator++(UShort &val)   // Pre-increment
   2629 	{
   2630 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
   2631 		val.storeValue(inc);
   2632 
   2633 		return val;
   2634 	}
   2635 
   2636 	RValue<UShort> operator--(UShort &val, int)   // Post-decrement
   2637 	{
   2638 		RValue<UShort> res = val;
   2639 
   2640 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
   2641 		val.storeValue(inc);
   2642 
   2643 		return res;
   2644 	}
   2645 
   2646 	const UShort &operator--(UShort &val)   // Pre-decrement
   2647 	{
   2648 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
   2649 		val.storeValue(inc);
   2650 
   2651 		return val;
   2652 	}
   2653 
   2654 	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
   2655 	{
   2656 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   2657 	}
   2658 
   2659 	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
   2660 	{
   2661 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   2662 	}
   2663 
   2664 	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
   2665 	{
   2666 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   2667 	}
   2668 
   2669 	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
   2670 	{
   2671 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   2672 	}
   2673 
   2674 	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
   2675 	{
   2676 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   2677 	}
   2678 
   2679 	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
   2680 	{
   2681 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   2682 	}
   2683 
   2684 	Type *UShort::getType()
   2685 	{
   2686 		return T(llvm::Type::getInt16Ty(*::context));
   2687 	}
   2688 
   2689 	Byte4::Byte4(RValue<Byte8> cast)
   2690 	{
   2691 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   2692 	}
   2693 
   2694 	Byte4::Byte4(const Reference<Byte4> &rhs)
   2695 	{
   2696 		Value *value = rhs.loadValue();
   2697 		storeValue(value);
   2698 	}
   2699 
   2700 	Type *Byte4::getType()
   2701 	{
   2702 		return T(Type_v4i8);
   2703 	}
   2704 
   2705 	Type *SByte4::getType()
   2706 	{
   2707 		return T(Type_v4i8);
   2708 	}
   2709 
   2710 	Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
   2711 	{
   2712 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
   2713 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2714 	}
   2715 
   2716 	Byte8::Byte8(RValue<Byte8> rhs)
   2717 	{
   2718 		storeValue(rhs.value);
   2719 	}
   2720 
   2721 	Byte8::Byte8(const Byte8 &rhs)
   2722 	{
   2723 		Value *value = rhs.loadValue();
   2724 		storeValue(value);
   2725 	}
   2726 
   2727 	Byte8::Byte8(const Reference<Byte8> &rhs)
   2728 	{
   2729 		Value *value = rhs.loadValue();
   2730 		storeValue(value);
   2731 	}
   2732 
   2733 	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
   2734 	{
   2735 		storeValue(rhs.value);
   2736 
   2737 		return rhs;
   2738 	}
   2739 
   2740 	RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
   2741 	{
   2742 		Value *value = rhs.loadValue();
   2743 		storeValue(value);
   2744 
   2745 		return RValue<Byte8>(value);
   2746 	}
   2747 
   2748 	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
   2749 	{
   2750 		Value *value = rhs.loadValue();
   2751 		storeValue(value);
   2752 
   2753 		return RValue<Byte8>(value);
   2754 	}
   2755 
   2756 	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2757 	{
   2758 		return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2759 	}
   2760 
   2761 	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2762 	{
   2763 		return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
   2764 	}
   2765 
   2766 //	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2767 //	{
   2768 //		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
   2769 //	}
   2770 
   2771 //	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2772 //	{
   2773 //		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
   2774 //	}
   2775 
   2776 //	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2777 //	{
   2778 //		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
   2779 //	}
   2780 
   2781 	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2782 	{
   2783 		return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
   2784 	}
   2785 
   2786 	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2787 	{
   2788 		return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
   2789 	}
   2790 
   2791 	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2792 	{
   2793 		return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
   2794 	}
   2795 
   2796 //	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
   2797 //	{
   2798 //		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
   2799 //	}
   2800 
   2801 //	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
   2802 //	{
   2803 //		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
   2804 //	}
   2805 
   2806 	RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
   2807 	{
   2808 		return lhs = lhs + rhs;
   2809 	}
   2810 
   2811 	RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
   2812 	{
   2813 		return lhs = lhs - rhs;
   2814 	}
   2815 
   2816 //	RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
   2817 //	{
   2818 //		return lhs = lhs * rhs;
   2819 //	}
   2820 
   2821 //	RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
   2822 //	{
   2823 //		return lhs = lhs / rhs;
   2824 //	}
   2825 
   2826 //	RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
   2827 //	{
   2828 //		return lhs = lhs % rhs;
   2829 //	}
   2830 
   2831 	RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
   2832 	{
   2833 		return lhs = lhs & rhs;
   2834 	}
   2835 
   2836 	RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
   2837 	{
   2838 		return lhs = lhs | rhs;
   2839 	}
   2840 
   2841 	RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
   2842 	{
   2843 		return lhs = lhs ^ rhs;
   2844 	}
   2845 
   2846 //	RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
   2847 //	{
   2848 //		return lhs = lhs << rhs;
   2849 //	}
   2850 
   2851 //	RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
   2852 //	{
   2853 //		return lhs = lhs >> rhs;
   2854 //	}
   2855 
   2856 //	RValue<Byte8> operator+(RValue<Byte8> val)
   2857 //	{
   2858 //		return val;
   2859 //	}
   2860 
   2861 //	RValue<Byte8> operator-(RValue<Byte8> val)
   2862 //	{
   2863 //		return RValue<Byte8>(Nucleus::createNeg(val.value));
   2864 //	}
   2865 
   2866 	RValue<Byte8> operator~(RValue<Byte8> val)
   2867 	{
   2868 		return RValue<Byte8>(Nucleus::createNot(val.value));
   2869 	}
   2870 
   2871 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
   2872 	{
   2873 #if defined(__i386__) || defined(__x86_64__)
   2874 		return x86::paddusb(x, y);
   2875 #else
   2876 		return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
   2877 #endif
   2878 	}
   2879 
   2880 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
   2881 	{
   2882 #if defined(__i386__) || defined(__x86_64__)
   2883 		return x86::psubusb(x, y);
   2884 #else
   2885 		return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
   2886 #endif
   2887 	}
   2888 
   2889 	RValue<Short4> Unpack(RValue<Byte4> x)
   2890 	{
   2891 		int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
   2892 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
   2893 	}
   2894 
   2895 	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
   2896 	{
   2897 		return UnpackLow(As<Byte8>(x), As<Byte8>(y));
   2898 	}
   2899 
   2900 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
   2901 	{
   2902 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2903 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2904 	}
   2905 
   2906 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
   2907 	{
   2908 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2909 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2910 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
   2911 	}
   2912 
   2913 	RValue<Int> SignMask(RValue<Byte8> x)
   2914 	{
   2915 #if defined(__i386__) || defined(__x86_64__)
   2916 		return x86::pmovmskb(x);
   2917 #else
   2918 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
   2919 #endif
   2920 	}
   2921 
   2922 //	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
   2923 //	{
   2924 //#if defined(__i386__) || defined(__x86_64__)
   2925 //		return x86::pcmpgtb(x, y);   // FIXME: Signedness
   2926 //#else
   2927 //		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
   2928 //#endif
   2929 //	}
   2930 
   2931 	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
   2932 	{
   2933 #if defined(__i386__) || defined(__x86_64__)
   2934 		return x86::pcmpeqb(x, y);
   2935 #else
   2936 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
   2937 #endif
   2938 	}
   2939 
   2940 	Type *Byte8::getType()
   2941 	{
   2942 		return T(Type_v8i8);
   2943 	}
   2944 
   2945 	SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
   2946 	{
   2947 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
   2948 		Value *vector = Nucleus::createConstantVector(constantVector, getType());
   2949 
   2950 		storeValue(Nucleus::createBitCast(vector, getType()));
   2951 	}
   2952 
   2953 	SByte8::SByte8(RValue<SByte8> rhs)
   2954 	{
   2955 		storeValue(rhs.value);
   2956 	}
   2957 
   2958 	SByte8::SByte8(const SByte8 &rhs)
   2959 	{
   2960 		Value *value = rhs.loadValue();
   2961 		storeValue(value);
   2962 	}
   2963 
   2964 	SByte8::SByte8(const Reference<SByte8> &rhs)
   2965 	{
   2966 		Value *value = rhs.loadValue();
   2967 		storeValue(value);
   2968 	}
   2969 
   2970 	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
   2971 	{
   2972 		storeValue(rhs.value);
   2973 
   2974 		return rhs;
   2975 	}
   2976 
   2977 	RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
   2978 	{
   2979 		Value *value = rhs.loadValue();
   2980 		storeValue(value);
   2981 
   2982 		return RValue<SByte8>(value);
   2983 	}
   2984 
   2985 	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
   2986 	{
   2987 		Value *value = rhs.loadValue();
   2988 		storeValue(value);
   2989 
   2990 		return RValue<SByte8>(value);
   2991 	}
   2992 
   2993 	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2994 	{
   2995 		return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2996 	}
   2997 
   2998 	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2999 	{
   3000 		return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
   3001 	}
   3002 
   3003 //	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3004 //	{
   3005 //		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
   3006 //	}
   3007 
   3008 //	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3009 //	{
   3010 //		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
   3011 //	}
   3012 
   3013 //	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3014 //	{
   3015 //		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
   3016 //	}
   3017 
   3018 	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3019 	{
   3020 		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
   3021 	}
   3022 
   3023 	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3024 	{
   3025 		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
   3026 	}
   3027 
   3028 	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
   3029 	{
   3030 		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
   3031 	}
   3032 
   3033 //	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
   3034 //	{
   3035 //		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
   3036 //	}
   3037 
   3038 //	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
   3039 //	{
   3040 //		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
   3041 //	}
   3042 
   3043 	RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
   3044 	{
   3045 		return lhs = lhs + rhs;
   3046 	}
   3047 
   3048 	RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
   3049 	{
   3050 		return lhs = lhs - rhs;
   3051 	}
   3052 
   3053 //	RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
   3054 //	{
   3055 //		return lhs = lhs * rhs;
   3056 //	}
   3057 
   3058 //	RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
   3059 //	{
   3060 //		return lhs = lhs / rhs;
   3061 //	}
   3062 
   3063 //	RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
   3064 //	{
   3065 //		return lhs = lhs % rhs;
   3066 //	}
   3067 
   3068 	RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
   3069 	{
   3070 		return lhs = lhs & rhs;
   3071 	}
   3072 
   3073 	RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
   3074 	{
   3075 		return lhs = lhs | rhs;
   3076 	}
   3077 
   3078 	RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
   3079 	{
   3080 		return lhs = lhs ^ rhs;
   3081 	}
   3082 
   3083 //	RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
   3084 //	{
   3085 //		return lhs = lhs << rhs;
   3086 //	}
   3087 
   3088 //	RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
   3089 //	{
   3090 //		return lhs = lhs >> rhs;
   3091 //	}
   3092 
   3093 //	RValue<SByte8> operator+(RValue<SByte8> val)
   3094 //	{
   3095 //		return val;
   3096 //	}
   3097 
   3098 //	RValue<SByte8> operator-(RValue<SByte8> val)
   3099 //	{
   3100 //		return RValue<SByte8>(Nucleus::createNeg(val.value));
   3101 //	}
   3102 
   3103 	RValue<SByte8> operator~(RValue<SByte8> val)
   3104 	{
   3105 		return RValue<SByte8>(Nucleus::createNot(val.value));
   3106 	}
   3107 
   3108 	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
   3109 	{
   3110 #if defined(__i386__) || defined(__x86_64__)
   3111 		return x86::paddsb(x, y);
   3112 #else
   3113 		return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
   3114 #endif
   3115 	}
   3116 
   3117 	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
   3118 	{
   3119 #if defined(__i386__) || defined(__x86_64__)
   3120 		return x86::psubsb(x, y);
   3121 #else
   3122 		return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
   3123 #endif
   3124 	}
   3125 
   3126 	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
   3127 	{
   3128 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   3129 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   3130 	}
   3131 
   3132 	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
   3133 	{
   3134 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   3135 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   3136 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
   3137 	}
   3138 
   3139 	RValue<Int> SignMask(RValue<SByte8> x)
   3140 	{
   3141 #if defined(__i386__) || defined(__x86_64__)
   3142 		return x86::pmovmskb(As<Byte8>(x));
   3143 #else
   3144 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
   3145 #endif
   3146 	}
   3147 
   3148 	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
   3149 	{
   3150 #if defined(__i386__) || defined(__x86_64__)
   3151 		return x86::pcmpgtb(x, y);
   3152 #else
   3153 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
   3154 #endif
   3155 	}
   3156 
   3157 	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
   3158 	{
   3159 #if defined(__i386__) || defined(__x86_64__)
   3160 		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
   3161 #else
   3162 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
   3163 #endif
   3164 	}
   3165 
   3166 	Type *SByte8::getType()
   3167 	{
   3168 		return T(Type_v8i8);
   3169 	}
   3170 
   3171 	Byte16::Byte16(RValue<Byte16> rhs)
   3172 	{
   3173 		storeValue(rhs.value);
   3174 	}
   3175 
   3176 	Byte16::Byte16(const Byte16 &rhs)
   3177 	{
   3178 		Value *value = rhs.loadValue();
   3179 		storeValue(value);
   3180 	}
   3181 
   3182 	Byte16::Byte16(const Reference<Byte16> &rhs)
   3183 	{
   3184 		Value *value = rhs.loadValue();
   3185 		storeValue(value);
   3186 	}
   3187 
   3188 	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
   3189 	{
   3190 		storeValue(rhs.value);
   3191 
   3192 		return rhs;
   3193 	}
   3194 
   3195 	RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
   3196 	{
   3197 		Value *value = rhs.loadValue();
   3198 		storeValue(value);
   3199 
   3200 		return RValue<Byte16>(value);
   3201 	}
   3202 
   3203 	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
   3204 	{
   3205 		Value *value = rhs.loadValue();
   3206 		storeValue(value);
   3207 
   3208 		return RValue<Byte16>(value);
   3209 	}
   3210 
   3211 	Type *Byte16::getType()
   3212 	{
   3213 		return T(llvm::VectorType::get(T(Byte::getType()), 16));
   3214 	}
   3215 
   3216 	Type *SByte16::getType()
   3217 	{
   3218 		return T(llvm::VectorType::get(T(SByte::getType()), 16));
   3219 	}
   3220 
   3221 	Short2::Short2(RValue<Short4> cast)
   3222 	{
   3223 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   3224 	}
   3225 
   3226 	Type *Short2::getType()
   3227 	{
   3228 		return T(Type_v2i16);
   3229 	}
   3230 
   3231 	UShort2::UShort2(RValue<UShort4> cast)
   3232 	{
   3233 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   3234 	}
   3235 
   3236 	Type *UShort2::getType()
   3237 	{
   3238 		return T(Type_v2i16);
   3239 	}
   3240 
   3241 	Short4::Short4(RValue<Int> cast)
   3242 	{
   3243 		Value *vector = loadValue();
   3244 		Value *element = Nucleus::createTrunc(cast.value, Short::getType());
   3245 		Value *insert = Nucleus::createInsertElement(vector, element, 0);
   3246 		Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
   3247 
   3248 		storeValue(swizzle);
   3249 	}
   3250 
   3251 	Short4::Short4(RValue<Int4> cast)
   3252 	{
   3253 		int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
   3254 		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
   3255 
   3256 		Value *packed = Nucleus::createShuffleVector(short8, short8, select);
   3257 		Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
   3258 
   3259 		storeValue(short4);
   3260 	}
   3261 
   3262 //	Short4::Short4(RValue<Float> cast)
   3263 //	{
   3264 //	}
   3265 
   3266 	Short4::Short4(RValue<Float4> cast)
   3267 	{
   3268 		Int4 v4i32 = Int4(cast);
   3269 #if defined(__i386__) || defined(__x86_64__)
   3270 		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
   3271 #else
   3272 		Value *v = v4i32.loadValue();
   3273 		v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
   3274 #endif
   3275 
   3276 		storeValue(As<Short4>(Int2(v4i32)).value);
   3277 	}
   3278 
   3279 	Short4::Short4(short xyzw)
   3280 	{
   3281 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
   3282 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3283 	}
   3284 
   3285 	Short4::Short4(short x, short y, short z, short w)
   3286 	{
   3287 		int64_t constantVector[4] = {x, y, z, w};
   3288 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3289 	}
   3290 
   3291 	Short4::Short4(RValue<Short4> rhs)
   3292 	{
   3293 		storeValue(rhs.value);
   3294 	}
   3295 
   3296 	Short4::Short4(const Short4 &rhs)
   3297 	{
   3298 		Value *value = rhs.loadValue();
   3299 		storeValue(value);
   3300 	}
   3301 
   3302 	Short4::Short4(const Reference<Short4> &rhs)
   3303 	{
   3304 		Value *value = rhs.loadValue();
   3305 		storeValue(value);
   3306 	}
   3307 
   3308 	Short4::Short4(RValue<UShort4> rhs)
   3309 	{
   3310 		storeValue(rhs.value);
   3311 	}
   3312 
   3313 	Short4::Short4(const UShort4 &rhs)
   3314 	{
   3315 		storeValue(rhs.loadValue());
   3316 	}
   3317 
   3318 	Short4::Short4(const Reference<UShort4> &rhs)
   3319 	{
   3320 		storeValue(rhs.loadValue());
   3321 	}
   3322 
   3323 	RValue<Short4> Short4::operator=(RValue<Short4> rhs)
   3324 	{
   3325 		storeValue(rhs.value);
   3326 
   3327 		return rhs;
   3328 	}
   3329 
   3330 	RValue<Short4> Short4::operator=(const Short4 &rhs)
   3331 	{
   3332 		Value *value = rhs.loadValue();
   3333 		storeValue(value);
   3334 
   3335 		return RValue<Short4>(value);
   3336 	}
   3337 
   3338 	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
   3339 	{
   3340 		Value *value = rhs.loadValue();
   3341 		storeValue(value);
   3342 
   3343 		return RValue<Short4>(value);
   3344 	}
   3345 
   3346 	RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
   3347 	{
   3348 		storeValue(rhs.value);
   3349 
   3350 		return RValue<Short4>(rhs);
   3351 	}
   3352 
   3353 	RValue<Short4> Short4::operator=(const UShort4 &rhs)
   3354 	{
   3355 		Value *value = rhs.loadValue();
   3356 		storeValue(value);
   3357 
   3358 		return RValue<Short4>(value);
   3359 	}
   3360 
   3361 	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
   3362 	{
   3363 		Value *value = rhs.loadValue();
   3364 		storeValue(value);
   3365 
   3366 		return RValue<Short4>(value);
   3367 	}
   3368 
   3369 	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
   3370 	{
   3371 		return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
   3372 	}
   3373 
   3374 	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
   3375 	{
   3376 		return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
   3377 	}
   3378 
   3379 	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
   3380 	{
   3381 		return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
   3382 	}
   3383 
   3384 //	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
   3385 //	{
   3386 //		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
   3387 //	}
   3388 
   3389 //	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
   3390 //	{
   3391 //		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
   3392 //	}
   3393 
   3394 	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
   3395 	{
   3396 		return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
   3397 	}
   3398 
   3399 	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
   3400 	{
   3401 		return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
   3402 	}
   3403 
   3404 	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
   3405 	{
   3406 		return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
   3407 	}
   3408 
   3409 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
   3410 	{
   3411 #if defined(__i386__) || defined(__x86_64__)
   3412 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   3413 
   3414 		return x86::psllw(lhs, rhs);
   3415 #else
   3416 		return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
   3417 #endif
   3418 	}
   3419 
   3420 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
   3421 	{
   3422 #if defined(__i386__) || defined(__x86_64__)
   3423 		return x86::psraw(lhs, rhs);
   3424 #else
   3425 		return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
   3426 #endif
   3427 	}
   3428 
   3429 	RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
   3430 	{
   3431 		return lhs = lhs + rhs;
   3432 	}
   3433 
   3434 	RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
   3435 	{
   3436 		return lhs = lhs - rhs;
   3437 	}
   3438 
   3439 	RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
   3440 	{
   3441 		return lhs = lhs * rhs;
   3442 	}
   3443 
   3444 //	RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
   3445 //	{
   3446 //		return lhs = lhs / rhs;
   3447 //	}
   3448 
   3449 //	RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
   3450 //	{
   3451 //		return lhs = lhs % rhs;
   3452 //	}
   3453 
   3454 	RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
   3455 	{
   3456 		return lhs = lhs & rhs;
   3457 	}
   3458 
   3459 	RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
   3460 	{
   3461 		return lhs = lhs | rhs;
   3462 	}
   3463 
   3464 	RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
   3465 	{
   3466 		return lhs = lhs ^ rhs;
   3467 	}
   3468 
   3469 	RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
   3470 	{
   3471 		return lhs = lhs << rhs;
   3472 	}
   3473 
   3474 	RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
   3475 	{
   3476 		return lhs = lhs >> rhs;
   3477 	}
   3478 
   3479 //	RValue<Short4> operator+(RValue<Short4> val)
   3480 //	{
   3481 //		return val;
   3482 //	}
   3483 
   3484 	RValue<Short4> operator-(RValue<Short4> val)
   3485 	{
   3486 		return RValue<Short4>(Nucleus::createNeg(val.value));
   3487 	}
   3488 
   3489 	RValue<Short4> operator~(RValue<Short4> val)
   3490 	{
   3491 		return RValue<Short4>(Nucleus::createNot(val.value));
   3492 	}
   3493 
   3494 	RValue<Short4> RoundShort4(RValue<Float4> cast)
   3495 	{
   3496 		RValue<Int4> int4 = RoundInt(cast);
   3497 		return As<Short4>(PackSigned(int4, int4));
   3498 	}
   3499 
   3500 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
   3501 	{
   3502 #if defined(__i386__) || defined(__x86_64__)
   3503 		return x86::pmaxsw(x, y);
   3504 #else
   3505 		return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
   3506 #endif
   3507 	}
   3508 
   3509 	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
   3510 	{
   3511 #if defined(__i386__) || defined(__x86_64__)
   3512 		return x86::pminsw(x, y);
   3513 #else
   3514 		return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
   3515 #endif
   3516 	}
   3517 
   3518 	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
   3519 	{
   3520 #if defined(__i386__) || defined(__x86_64__)
   3521 		return x86::paddsw(x, y);
   3522 #else
   3523 		return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
   3524 #endif
   3525 	}
   3526 
   3527 	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
   3528 	{
   3529 #if defined(__i386__) || defined(__x86_64__)
   3530 		return x86::psubsw(x, y);
   3531 #else
   3532 		return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
   3533 #endif
   3534 	}
   3535 
   3536 	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
   3537 	{
   3538 #if defined(__i386__) || defined(__x86_64__)
   3539 		return x86::pmulhw(x, y);
   3540 #else
   3541 		return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
   3542 #endif
   3543 	}
   3544 
   3545 	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
   3546 	{
   3547 #if defined(__i386__) || defined(__x86_64__)
   3548 		return x86::pmaddwd(x, y);
   3549 #else
   3550 		return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
   3551 #endif
   3552 	}
   3553 
   3554 	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
   3555 	{
   3556 #if defined(__i386__) || defined(__x86_64__)
   3557 		auto result = x86::packsswb(x, y);
   3558 #else
   3559 		auto result = V(lowerPack(V(x.value), V(y.value), true));
   3560 #endif
   3561 		return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
   3562 	}
   3563 
   3564 	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
   3565 	{
   3566 #if defined(__i386__) || defined(__x86_64__)
   3567 		auto result = x86::packuswb(x, y);
   3568 #else
   3569 		auto result = V(lowerPack(V(x.value), V(y.value), false));
   3570 #endif
   3571 		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
   3572 	}
   3573 
   3574 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
   3575 	{
   3576 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
   3577 		return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   3578 	}
   3579 
   3580 	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
   3581 	{
   3582 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
   3583 		auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   3584 		return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
   3585 	}
   3586 
   3587 	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
   3588 	{
   3589 		// Real type is v8i16
   3590 		int shuffle[8] =
   3591 		{
   3592 			(select >> 0) & 0x03,
   3593 			(select >> 2) & 0x03,
   3594 			(select >> 4) & 0x03,
   3595 			(select >> 6) & 0x03,
   3596 			(select >> 0) & 0x03,
   3597 			(select >> 2) & 0x03,
   3598 			(select >> 4) & 0x03,
   3599 			(select >> 6) & 0x03,
   3600 		};
   3601 
   3602 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
   3603 	}
   3604 
   3605 	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
   3606 	{
   3607 		return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
   3608 	}
   3609 
   3610 	RValue<Short> Extract(RValue<Short4> val, int i)
   3611 	{
   3612 		return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
   3613 	}
   3614 
   3615 	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
   3616 	{
   3617 #if defined(__i386__) || defined(__x86_64__)
   3618 		return x86::pcmpgtw(x, y);
   3619 #else
   3620 		return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
   3621 #endif
   3622 	}
   3623 
   3624 	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
   3625 	{
   3626 #if defined(__i386__) || defined(__x86_64__)
   3627 		return x86::pcmpeqw(x, y);
   3628 #else
   3629 		return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
   3630 #endif
   3631 	}
   3632 
   3633 	Type *Short4::getType()
   3634 	{
   3635 		return T(Type_v4i16);
   3636 	}
   3637 
   3638 	UShort4::UShort4(RValue<Int4> cast)
   3639 	{
   3640 		*this = Short4(cast);
   3641 	}
   3642 
   3643 	UShort4::UShort4(RValue<Float4> cast, bool saturate)
   3644 	{
   3645 		if(saturate)
   3646 		{
   3647 #if defined(__i386__) || defined(__x86_64__)
   3648 			if(CPUID::supportsSSE4_1())
   3649 			{
   3650 				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
   3651 				*this = As<Short4>(PackUnsigned(int4, int4));
   3652 			}
   3653 			else
   3654 #endif
   3655 			{
   3656 				*this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
   3657 			}
   3658 		}
   3659 		else
   3660 		{
   3661 			*this = Short4(Int4(cast));
   3662 		}
   3663 	}
   3664 
   3665 	UShort4::UShort4(unsigned short xyzw)
   3666 	{
   3667 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
   3668 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3669 	}
   3670 
   3671 	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
   3672 	{
   3673 		int64_t constantVector[4] = {x, y, z, w};
   3674 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3675 	}
   3676 
   3677 	UShort4::UShort4(RValue<UShort4> rhs)
   3678 	{
   3679 		storeValue(rhs.value);
   3680 	}
   3681 
   3682 	UShort4::UShort4(const UShort4 &rhs)
   3683 	{
   3684 		Value *value = rhs.loadValue();
   3685 		storeValue(value);
   3686 	}
   3687 
   3688 	UShort4::UShort4(const Reference<UShort4> &rhs)
   3689 	{
   3690 		Value *value = rhs.loadValue();
   3691 		storeValue(value);
   3692 	}
   3693 
   3694 	UShort4::UShort4(RValue<Short4> rhs)
   3695 	{
   3696 		storeValue(rhs.value);
   3697 	}
   3698 
   3699 	UShort4::UShort4(const Short4 &rhs)
   3700 	{
   3701 		Value *value = rhs.loadValue();
   3702 		storeValue(value);
   3703 	}
   3704 
   3705 	UShort4::UShort4(const Reference<Short4> &rhs)
   3706 	{
   3707 		Value *value = rhs.loadValue();
   3708 		storeValue(value);
   3709 	}
   3710 
   3711 	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
   3712 	{
   3713 		storeValue(rhs.value);
   3714 
   3715 		return rhs;
   3716 	}
   3717 
   3718 	RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
   3719 	{
   3720 		Value *value = rhs.loadValue();
   3721 		storeValue(value);
   3722 
   3723 		return RValue<UShort4>(value);
   3724 	}
   3725 
   3726 	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
   3727 	{
   3728 		Value *value = rhs.loadValue();
   3729 		storeValue(value);
   3730 
   3731 		return RValue<UShort4>(value);
   3732 	}
   3733 
   3734 	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
   3735 	{
   3736 		storeValue(rhs.value);
   3737 
   3738 		return RValue<UShort4>(rhs);
   3739 	}
   3740 
   3741 	RValue<UShort4> UShort4::operator=(const Short4 &rhs)
   3742 	{
   3743 		Value *value = rhs.loadValue();
   3744 		storeValue(value);
   3745 
   3746 		return RValue<UShort4>(value);
   3747 	}
   3748 
   3749 	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
   3750 	{
   3751 		Value *value = rhs.loadValue();
   3752 		storeValue(value);
   3753 
   3754 		return RValue<UShort4>(value);
   3755 	}
   3756 
   3757 	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3758 	{
   3759 		return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
   3760 	}
   3761 
   3762 	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3763 	{
   3764 		return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
   3765 	}
   3766 
   3767 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3768 	{
   3769 		return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
   3770 	}
   3771 
   3772 	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3773 	{
   3774 		return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
   3775 	}
   3776 
   3777 	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3778 	{
   3779 		return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
   3780 	}
   3781 
   3782 	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3783 	{
   3784 		return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
   3785 	}
   3786 
   3787 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
   3788 	{
   3789 #if defined(__i386__) || defined(__x86_64__)
   3790 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   3791 
   3792 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
   3793 #else
   3794 		return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
   3795 #endif
   3796 	}
   3797 
   3798 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
   3799 	{
   3800 #if defined(__i386__) || defined(__x86_64__)
   3801 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
   3802 
   3803 		return x86::psrlw(lhs, rhs);
   3804 #else
   3805 		return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
   3806 #endif
   3807 	}
   3808 
   3809 	RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
   3810 	{
   3811 		return lhs = lhs << rhs;
   3812 	}
   3813 
   3814 	RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
   3815 	{
   3816 		return lhs = lhs >> rhs;
   3817 	}
   3818 
   3819 	RValue<UShort4> operator~(RValue<UShort4> val)
   3820 	{
   3821 		return RValue<UShort4>(Nucleus::createNot(val.value));
   3822 	}
   3823 
   3824 	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
   3825 	{
   3826 		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3827 	}
   3828 
   3829 	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
   3830 	{
   3831 		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3832 	}
   3833 
   3834 	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
   3835 	{
   3836 #if defined(__i386__) || defined(__x86_64__)
   3837 		return x86::paddusw(x, y);
   3838 #else
   3839 		return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
   3840 #endif
   3841 	}
   3842 
   3843 	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
   3844 	{
   3845 #if defined(__i386__) || defined(__x86_64__)
   3846 		return x86::psubusw(x, y);
   3847 #else
   3848 		return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
   3849 #endif
   3850 	}
   3851 
   3852 	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
   3853 	{
   3854 #if defined(__i386__) || defined(__x86_64__)
   3855 		return x86::pmulhuw(x, y);
   3856 #else
   3857 		return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
   3858 #endif
   3859 	}
   3860 
   3861 	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
   3862 	{
   3863 #if defined(__i386__) || defined(__x86_64__)
   3864 		return x86::pavgw(x, y);
   3865 #else
   3866 		return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
   3867 #endif
   3868 	}
   3869 
   3870 	Type *UShort4::getType()
   3871 	{
   3872 		return T(Type_v4i16);
   3873 	}
   3874 
   3875 	Short8::Short8(short c)
   3876 	{
   3877 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
   3878 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3879 	}
   3880 
   3881 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
   3882 	{
   3883 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
   3884 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3885 	}
   3886 
   3887 	Short8::Short8(RValue<Short8> rhs)
   3888 	{
   3889 		storeValue(rhs.value);
   3890 	}
   3891 
   3892 	Short8::Short8(const Reference<Short8> &rhs)
   3893 	{
   3894 		Value *value = rhs.loadValue();
   3895 		storeValue(value);
   3896 	}
   3897 
   3898 	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
   3899 	{
   3900 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
   3901 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   3902 
   3903 		storeValue(packed);
   3904 	}
   3905 
   3906 	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
   3907 	{
   3908 		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
   3909 	}
   3910 
   3911 	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
   3912 	{
   3913 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
   3914 	}
   3915 
   3916 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
   3917 	{
   3918 #if defined(__i386__) || defined(__x86_64__)
   3919 		return x86::psllw(lhs, rhs);
   3920 #else
   3921 		return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
   3922 #endif
   3923 	}
   3924 
   3925 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
   3926 	{
   3927 #if defined(__i386__) || defined(__x86_64__)
   3928 		return x86::psraw(lhs, rhs);
   3929 #else
   3930 		return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
   3931 #endif
   3932 	}
   3933 
   3934 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
   3935 	{
   3936 #if defined(__i386__) || defined(__x86_64__)
   3937 		return x86::pmaddwd(x, y);
   3938 #else
   3939 		return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
   3940 #endif
   3941 	}
   3942 
   3943 	RValue<Int4> Abs(RValue<Int4> x)
   3944 	{
   3945 		auto negative = x >> 31;
   3946 		return (x ^ negative) - negative;
   3947 	}
   3948 
   3949 	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
   3950 	{
   3951 #if defined(__i386__) || defined(__x86_64__)
   3952 		return x86::pmulhw(x, y);
   3953 #else
   3954 		return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
   3955 #endif
   3956 	}
   3957 
   3958 	Type *Short8::getType()
   3959 	{
   3960 		return T(llvm::VectorType::get(T(Short::getType()), 8));
   3961 	}
   3962 
   3963 	UShort8::UShort8(unsigned short c)
   3964 	{
   3965 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
   3966 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3967 	}
   3968 
   3969 	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
   3970 	{
   3971 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
   3972 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3973 	}
   3974 
   3975 	UShort8::UShort8(RValue<UShort8> rhs)
   3976 	{
   3977 		storeValue(rhs.value);
   3978 	}
   3979 
   3980 	UShort8::UShort8(const Reference<UShort8> &rhs)
   3981 	{
   3982 		Value *value = rhs.loadValue();
   3983 		storeValue(value);
   3984 	}
   3985 
   3986 	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
   3987 	{
   3988 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
   3989 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   3990 
   3991 		storeValue(packed);
   3992 	}
   3993 
   3994 	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
   3995 	{
   3996 		storeValue(rhs.value);
   3997 
   3998 		return rhs;
   3999 	}
   4000 
   4001 	RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
   4002 	{
   4003 		Value *value = rhs.loadValue();
   4004 		storeValue(value);
   4005 
   4006 		return RValue<UShort8>(value);
   4007 	}
   4008 
   4009 	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
   4010 	{
   4011 		Value *value = rhs.loadValue();
   4012 		storeValue(value);
   4013 
   4014 		return RValue<UShort8>(value);
   4015 	}
   4016 
   4017 	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
   4018 	{
   4019 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
   4020 	}
   4021 
   4022 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
   4023 	{
   4024 #if defined(__i386__) || defined(__x86_64__)
   4025 		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
   4026 #else
   4027 		return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
   4028 #endif
   4029 	}
   4030 
   4031 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
   4032 	{
   4033 #if defined(__i386__) || defined(__x86_64__)
   4034 		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
   4035 #else
   4036 		return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
   4037 #endif
   4038 	}
   4039 
   4040 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
   4041 	{
   4042 		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
   4043 	}
   4044 
   4045 	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
   4046 	{
   4047 		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
   4048 	}
   4049 
   4050 	RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
   4051 	{
   4052 		return lhs = lhs + rhs;
   4053 	}
   4054 
   4055 	RValue<UShort8> operator~(RValue<UShort8> val)
   4056 	{
   4057 		return RValue<UShort8>(Nucleus::createNot(val.value));
   4058 	}
   4059 
   4060 	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
   4061 	{
   4062 		int pshufb[16] =
   4063 		{
   4064 			select0 + 0,
   4065 			select0 + 1,
   4066 			select1 + 0,
   4067 			select1 + 1,
   4068 			select2 + 0,
   4069 			select2 + 1,
   4070 			select3 + 0,
   4071 			select3 + 1,
   4072 			select4 + 0,
   4073 			select4 + 1,
   4074 			select5 + 0,
   4075 			select5 + 1,
   4076 			select6 + 0,
   4077 			select6 + 1,
   4078 			select7 + 0,
   4079 			select7 + 1,
   4080 		};
   4081 
   4082 		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
   4083 		Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
   4084 		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
   4085 
   4086 		return RValue<UShort8>(short8);
   4087 	}
   4088 
   4089 	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
   4090 	{
   4091 #if defined(__i386__) || defined(__x86_64__)
   4092 		return x86::pmulhuw(x, y);
   4093 #else
   4094 		return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
   4095 #endif
   4096 	}
   4097 
   4098 	Type *UShort8::getType()
   4099 	{
   4100 		return T(llvm::VectorType::get(T(UShort::getType()), 8));
   4101 	}
   4102 
   4103 	Int::Int(Argument<Int> argument)
   4104 	{
   4105 		storeValue(argument.value);
   4106 	}
   4107 
   4108 	Int::Int(RValue<Byte> cast)
   4109 	{
   4110 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   4111 
   4112 		storeValue(integer);
   4113 	}
   4114 
   4115 	Int::Int(RValue<SByte> cast)
   4116 	{
   4117 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   4118 
   4119 		storeValue(integer);
   4120 	}
   4121 
   4122 	Int::Int(RValue<Short> cast)
   4123 	{
   4124 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   4125 
   4126 		storeValue(integer);
   4127 	}
   4128 
   4129 	Int::Int(RValue<UShort> cast)
   4130 	{
   4131 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   4132 
   4133 		storeValue(integer);
   4134 	}
   4135 
   4136 	Int::Int(RValue<Int2> cast)
   4137 	{
   4138 		*this = Extract(cast, 0);
   4139 	}
   4140 
   4141 	Int::Int(RValue<Long> cast)
   4142 	{
   4143 		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
   4144 
   4145 		storeValue(integer);
   4146 	}
   4147 
   4148 	Int::Int(RValue<Float> cast)
   4149 	{
   4150 		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
   4151 
   4152 		storeValue(integer);
   4153 	}
   4154 
   4155 	Int::Int(int x)
   4156 	{
   4157 		storeValue(Nucleus::createConstantInt(x));
   4158 	}
   4159 
   4160 	Int::Int(RValue<Int> rhs)
   4161 	{
   4162 		storeValue(rhs.value);
   4163 	}
   4164 
   4165 	Int::Int(RValue<UInt> rhs)
   4166 	{
   4167 		storeValue(rhs.value);
   4168 	}
   4169 
   4170 	Int::Int(const Int &rhs)
   4171 	{
   4172 		Value *value = rhs.loadValue();
   4173 		storeValue(value);
   4174 	}
   4175 
   4176 	Int::Int(const Reference<Int> &rhs)
   4177 	{
   4178 		Value *value = rhs.loadValue();
   4179 		storeValue(value);
   4180 	}
   4181 
   4182 	Int::Int(const UInt &rhs)
   4183 	{
   4184 		Value *value = rhs.loadValue();
   4185 		storeValue(value);
   4186 	}
   4187 
   4188 	Int::Int(const Reference<UInt> &rhs)
   4189 	{
   4190 		Value *value = rhs.loadValue();
   4191 		storeValue(value);
   4192 	}
   4193 
   4194 	RValue<Int> Int::operator=(int rhs)
   4195 	{
   4196 		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
   4197 	}
   4198 
   4199 	RValue<Int> Int::operator=(RValue<Int> rhs)
   4200 	{
   4201 		storeValue(rhs.value);
   4202 
   4203 		return rhs;
   4204 	}
   4205 
   4206 	RValue<Int> Int::operator=(RValue<UInt> rhs)
   4207 	{
   4208 		storeValue(rhs.value);
   4209 
   4210 		return RValue<Int>(rhs);
   4211 	}
   4212 
   4213 	RValue<Int> Int::operator=(const Int &rhs)
   4214 	{
   4215 		Value *value = rhs.loadValue();
   4216 		storeValue(value);
   4217 
   4218 		return RValue<Int>(value);
   4219 	}
   4220 
   4221 	RValue<Int> Int::operator=(const Reference<Int> &rhs)
   4222 	{
   4223 		Value *value = rhs.loadValue();
   4224 		storeValue(value);
   4225 
   4226 		return RValue<Int>(value);
   4227 	}
   4228 
   4229 	RValue<Int> Int::operator=(const UInt &rhs)
   4230 	{
   4231 		Value *value = rhs.loadValue();
   4232 		storeValue(value);
   4233 
   4234 		return RValue<Int>(value);
   4235 	}
   4236 
   4237 	RValue<Int> Int::operator=(const Reference<UInt> &rhs)
   4238 	{
   4239 		Value *value = rhs.loadValue();
   4240 		storeValue(value);
   4241 
   4242 		return RValue<Int>(value);
   4243 	}
   4244 
   4245 	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
   4246 	{
   4247 		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
   4248 	}
   4249 
   4250 	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
   4251 	{
   4252 		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
   4253 	}
   4254 
   4255 	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
   4256 	{
   4257 		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
   4258 	}
   4259 
   4260 	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
   4261 	{
   4262 		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
   4263 	}
   4264 
   4265 	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
   4266 	{
   4267 		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
   4268 	}
   4269 
   4270 	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
   4271 	{
   4272 		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
   4273 	}
   4274 
   4275 	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
   4276 	{
   4277 		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
   4278 	}
   4279 
   4280 	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
   4281 	{
   4282 		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
   4283 	}
   4284 
   4285 	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
   4286 	{
   4287 		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
   4288 	}
   4289 
   4290 	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
   4291 	{
   4292 		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
   4293 	}
   4294 
   4295 	RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
   4296 	{
   4297 		return lhs = lhs + rhs;
   4298 	}
   4299 
   4300 	RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
   4301 	{
   4302 		return lhs = lhs - rhs;
   4303 	}
   4304 
   4305 	RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
   4306 	{
   4307 		return lhs = lhs * rhs;
   4308 	}
   4309 
   4310 	RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
   4311 	{
   4312 		return lhs = lhs / rhs;
   4313 	}
   4314 
   4315 	RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
   4316 	{
   4317 		return lhs = lhs % rhs;
   4318 	}
   4319 
   4320 	RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
   4321 	{
   4322 		return lhs = lhs & rhs;
   4323 	}
   4324 
   4325 	RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
   4326 	{
   4327 		return lhs = lhs | rhs;
   4328 	}
   4329 
   4330 	RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
   4331 	{
   4332 		return lhs = lhs ^ rhs;
   4333 	}
   4334 
   4335 	RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
   4336 	{
   4337 		return lhs = lhs << rhs;
   4338 	}
   4339 
   4340 	RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
   4341 	{
   4342 		return lhs = lhs >> rhs;
   4343 	}
   4344 
   4345 	RValue<Int> operator+(RValue<Int> val)
   4346 	{
   4347 		return val;
   4348 	}
   4349 
   4350 	RValue<Int> operator-(RValue<Int> val)
   4351 	{
   4352 		return RValue<Int>(Nucleus::createNeg(val.value));
   4353 	}
   4354 
   4355 	RValue<Int> operator~(RValue<Int> val)
   4356 	{
   4357 		return RValue<Int>(Nucleus::createNot(val.value));
   4358 	}
   4359 
   4360 	RValue<Int> operator++(Int &val, int)   // Post-increment
   4361 	{
   4362 		RValue<Int> res = val;
   4363 
   4364 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
   4365 		val.storeValue(inc);
   4366 
   4367 		return res;
   4368 	}
   4369 
   4370 	const Int &operator++(Int &val)   // Pre-increment
   4371 	{
   4372 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
   4373 		val.storeValue(inc);
   4374 
   4375 		return val;
   4376 	}
   4377 
   4378 	RValue<Int> operator--(Int &val, int)   // Post-decrement
   4379 	{
   4380 		RValue<Int> res = val;
   4381 
   4382 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
   4383 		val.storeValue(inc);
   4384 
   4385 		return res;
   4386 	}
   4387 
   4388 	const Int &operator--(Int &val)   // Pre-decrement
   4389 	{
   4390 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
   4391 		val.storeValue(inc);
   4392 
   4393 		return val;
   4394 	}
   4395 
   4396 	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
   4397 	{
   4398 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   4399 	}
   4400 
   4401 	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
   4402 	{
   4403 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   4404 	}
   4405 
   4406 	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
   4407 	{
   4408 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   4409 	}
   4410 
   4411 	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
   4412 	{
   4413 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   4414 	}
   4415 
   4416 	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
   4417 	{
   4418 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   4419 	}
   4420 
   4421 	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
   4422 	{
   4423 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   4424 	}
   4425 
   4426 	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
   4427 	{
   4428 		return IfThenElse(x > y, x, y);
   4429 	}
   4430 
   4431 	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
   4432 	{
   4433 		return IfThenElse(x < y, x, y);
   4434 	}
   4435 
   4436 	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
   4437 	{
   4438 		return Min(Max(x, min), max);
   4439 	}
   4440 
   4441 	RValue<Int> RoundInt(RValue<Float> cast)
   4442 	{
   4443 #if defined(__i386__) || defined(__x86_64__)
   4444 		return x86::cvtss2si(cast);
   4445 #else
   4446 		return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
   4447 #endif
   4448 	}
   4449 
   4450 	Type *Int::getType()
   4451 	{
   4452 		return T(llvm::Type::getInt32Ty(*::context));
   4453 	}
   4454 
   4455 	Long::Long(RValue<Int> cast)
   4456 	{
   4457 		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
   4458 
   4459 		storeValue(integer);
   4460 	}
   4461 
   4462 	Long::Long(RValue<UInt> cast)
   4463 	{
   4464 		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
   4465 
   4466 		storeValue(integer);
   4467 	}
   4468 
   4469 	Long::Long(RValue<Long> rhs)
   4470 	{
   4471 		storeValue(rhs.value);
   4472 	}
   4473 
   4474 	RValue<Long> Long::operator=(int64_t rhs)
   4475 	{
   4476 		return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
   4477 	}
   4478 
   4479 	RValue<Long> Long::operator=(RValue<Long> rhs)
   4480 	{
   4481 		storeValue(rhs.value);
   4482 
   4483 		return rhs;
   4484 	}
   4485 
   4486 	RValue<Long> Long::operator=(const Long &rhs)
   4487 	{
   4488 		Value *value = rhs.loadValue();
   4489 		storeValue(value);
   4490 
   4491 		return RValue<Long>(value);
   4492 	}
   4493 
   4494 	RValue<Long> Long::operator=(const Reference<Long> &rhs)
   4495 	{
   4496 		Value *value = rhs.loadValue();
   4497 		storeValue(value);
   4498 
   4499 		return RValue<Long>(value);
   4500 	}
   4501 
   4502 	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
   4503 	{
   4504 		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
   4505 	}
   4506 
   4507 	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
   4508 	{
   4509 		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
   4510 	}
   4511 
   4512 	RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
   4513 	{
   4514 		return lhs = lhs + rhs;
   4515 	}
   4516 
   4517 	RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
   4518 	{
   4519 		return lhs = lhs - rhs;
   4520 	}
   4521 
   4522 	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
   4523 	{
   4524 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
   4525 	}
   4526 
   4527 	Type *Long::getType()
   4528 	{
   4529 		return T(llvm::Type::getInt64Ty(*::context));
   4530 	}
   4531 
   4532 	UInt::UInt(Argument<UInt> argument)
   4533 	{
   4534 		storeValue(argument.value);
   4535 	}
   4536 
   4537 	UInt::UInt(RValue<UShort> cast)
   4538 	{
   4539 		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
   4540 
   4541 		storeValue(integer);
   4542 	}
   4543 
   4544 	UInt::UInt(RValue<Long> cast)
   4545 	{
   4546 		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
   4547 
   4548 		storeValue(integer);
   4549 	}
   4550 
   4551 	UInt::UInt(RValue<Float> cast)
   4552 	{
   4553 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
   4554 		// Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
   4555 
   4556 		// Smallest positive value representable in UInt, but not in Int
   4557 		const unsigned int ustart = 0x80000000u;
   4558 		const float ustartf = float(ustart);
   4559 
   4560 		// If the value is negative, store 0, otherwise store the result of the conversion
   4561 		storeValue((~(As<Int>(cast) >> 31) &
   4562 		// Check if the value can be represented as an Int
   4563 			IfThenElse(cast >= ustartf,
   4564 		// If the value is too large, subtract ustart and re-add it after conversion.
   4565 				As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
   4566 		// Otherwise, just convert normally
   4567 				Int(cast))).value);
   4568 	}
   4569 
   4570 	UInt::UInt(int x)
   4571 	{
   4572 		storeValue(Nucleus::createConstantInt(x));
   4573 	}
   4574 
   4575 	UInt::UInt(unsigned int x)
   4576 	{
   4577 		storeValue(Nucleus::createConstantInt(x));
   4578 	}
   4579 
   4580 	UInt::UInt(RValue<UInt> rhs)
   4581 	{
   4582 		storeValue(rhs.value);
   4583 	}
   4584 
   4585 	UInt::UInt(RValue<Int> rhs)
   4586 	{
   4587 		storeValue(rhs.value);
   4588 	}
   4589 
   4590 	UInt::UInt(const UInt &rhs)
   4591 	{
   4592 		Value *value = rhs.loadValue();
   4593 		storeValue(value);
   4594 	}
   4595 
   4596 	UInt::UInt(const Reference<UInt> &rhs)
   4597 	{
   4598 		Value *value = rhs.loadValue();
   4599 		storeValue(value);
   4600 	}
   4601 
   4602 	UInt::UInt(const Int &rhs)
   4603 	{
   4604 		Value *value = rhs.loadValue();
   4605 		storeValue(value);
   4606 	}
   4607 
   4608 	UInt::UInt(const Reference<Int> &rhs)
   4609 	{
   4610 		Value *value = rhs.loadValue();
   4611 		storeValue(value);
   4612 	}
   4613 
   4614 	RValue<UInt> UInt::operator=(unsigned int rhs)
   4615 	{
   4616 		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
   4617 	}
   4618 
   4619 	RValue<UInt> UInt::operator=(RValue<UInt> rhs)
   4620 	{
   4621 		storeValue(rhs.value);
   4622 
   4623 		return rhs;
   4624 	}
   4625 
   4626 	RValue<UInt> UInt::operator=(RValue<Int> rhs)
   4627 	{
   4628 		storeValue(rhs.value);
   4629 
   4630 		return RValue<UInt>(rhs);
   4631 	}
   4632 
   4633 	RValue<UInt> UInt::operator=(const UInt &rhs)
   4634 	{
   4635 		Value *value = rhs.loadValue();
   4636 		storeValue(value);
   4637 
   4638 		return RValue<UInt>(value);
   4639 	}
   4640 
   4641 	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
   4642 	{
   4643 		Value *value = rhs.loadValue();
   4644 		storeValue(value);
   4645 
   4646 		return RValue<UInt>(value);
   4647 	}
   4648 
   4649 	RValue<UInt> UInt::operator=(const Int &rhs)
   4650 	{
   4651 		Value *value = rhs.loadValue();
   4652 		storeValue(value);
   4653 
   4654 		return RValue<UInt>(value);
   4655 	}
   4656 
   4657 	RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
   4658 	{
   4659 		Value *value = rhs.loadValue();
   4660 		storeValue(value);
   4661 
   4662 		return RValue<UInt>(value);
   4663 	}
   4664 
   4665 	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
   4666 	{
   4667 		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
   4668 	}
   4669 
   4670 	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
   4671 	{
   4672 		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
   4673 	}
   4674 
   4675 	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
   4676 	{
   4677 		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
   4678 	}
   4679 
   4680 	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
   4681 	{
   4682 		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
   4683 	}
   4684 
   4685 	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
   4686 	{
   4687 		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
   4688 	}
   4689 
   4690 	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
   4691 	{
   4692 		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
   4693 	}
   4694 
   4695 	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
   4696 	{
   4697 		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
   4698 	}
   4699 
   4700 	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
   4701 	{
   4702 		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
   4703 	}
   4704 
   4705 	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
   4706 	{
   4707 		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
   4708 	}
   4709 
   4710 	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
   4711 	{
   4712 		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
   4713 	}
   4714 
   4715 	RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
   4716 	{
   4717 		return lhs = lhs + rhs;
   4718 	}
   4719 
   4720 	RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
   4721 	{
   4722 		return lhs = lhs - rhs;
   4723 	}
   4724 
   4725 	RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
   4726 	{
   4727 		return lhs = lhs * rhs;
   4728 	}
   4729 
   4730 	RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
   4731 	{
   4732 		return lhs = lhs / rhs;
   4733 	}
   4734 
   4735 	RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
   4736 	{
   4737 		return lhs = lhs % rhs;
   4738 	}
   4739 
   4740 	RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
   4741 	{
   4742 		return lhs = lhs & rhs;
   4743 	}
   4744 
   4745 	RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
   4746 	{
   4747 		return lhs = lhs | rhs;
   4748 	}
   4749 
   4750 	RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
   4751 	{
   4752 		return lhs = lhs ^ rhs;
   4753 	}
   4754 
   4755 	RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
   4756 	{
   4757 		return lhs = lhs << rhs;
   4758 	}
   4759 
   4760 	RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
   4761 	{
   4762 		return lhs = lhs >> rhs;
   4763 	}
   4764 
   4765 	RValue<UInt> operator+(RValue<UInt> val)
   4766 	{
   4767 		return val;
   4768 	}
   4769 
   4770 	RValue<UInt> operator-(RValue<UInt> val)
   4771 	{
   4772 		return RValue<UInt>(Nucleus::createNeg(val.value));
   4773 	}
   4774 
   4775 	RValue<UInt> operator~(RValue<UInt> val)
   4776 	{
   4777 		return RValue<UInt>(Nucleus::createNot(val.value));
   4778 	}
   4779 
   4780 	RValue<UInt> operator++(UInt &val, int)   // Post-increment
   4781 	{
   4782 		RValue<UInt> res = val;
   4783 
   4784 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
   4785 		val.storeValue(inc);
   4786 
   4787 		return res;
   4788 	}
   4789 
   4790 	const UInt &operator++(UInt &val)   // Pre-increment
   4791 	{
   4792 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
   4793 		val.storeValue(inc);
   4794 
   4795 		return val;
   4796 	}
   4797 
   4798 	RValue<UInt> operator--(UInt &val, int)   // Post-decrement
   4799 	{
   4800 		RValue<UInt> res = val;
   4801 
   4802 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
   4803 		val.storeValue(inc);
   4804 
   4805 		return res;
   4806 	}
   4807 
   4808 	const UInt &operator--(UInt &val)   // Pre-decrement
   4809 	{
   4810 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
   4811 		val.storeValue(inc);
   4812 
   4813 		return val;
   4814 	}
   4815 
   4816 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
   4817 	{
   4818 		return IfThenElse(x > y, x, y);
   4819 	}
   4820 
   4821 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
   4822 	{
   4823 		return IfThenElse(x < y, x, y);
   4824 	}
   4825 
   4826 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
   4827 	{
   4828 		return Min(Max(x, min), max);
   4829 	}
   4830 
   4831 	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
   4832 	{
   4833 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   4834 	}
   4835 
   4836 	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
   4837 	{
   4838 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   4839 	}
   4840 
   4841 	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
   4842 	{
   4843 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   4844 	}
   4845 
   4846 	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
   4847 	{
   4848 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   4849 	}
   4850 
   4851 	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
   4852 	{
   4853 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   4854 	}
   4855 
   4856 	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
   4857 	{
   4858 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   4859 	}
   4860 
   4861 //	RValue<UInt> RoundUInt(RValue<Float> cast)
   4862 //	{
   4863 //#if defined(__i386__) || defined(__x86_64__)
   4864 //		return x86::cvtss2si(val);   // FIXME: Unsigned
   4865 //#else
   4866 //		return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
   4867 //#endif
   4868 //	}
   4869 
   4870 	Type *UInt::getType()
   4871 	{
   4872 		return T(llvm::Type::getInt32Ty(*::context));
   4873 	}
   4874 
   4875 //	Int2::Int2(RValue<Int> cast)
   4876 //	{
   4877 //		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
   4878 //		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
   4879 //
   4880 //		int shuffle[2] = {0, 0};
   4881 //		Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
   4882 //
   4883 //		storeValue(replicate);
   4884 //	}
   4885 
   4886 	Int2::Int2(RValue<Int4> cast)
   4887 	{
   4888 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   4889 	}
   4890 
   4891 	Int2::Int2(int x, int y)
   4892 	{
   4893 		int64_t constantVector[2] = {x, y};
   4894 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   4895 	}
   4896 
   4897 	Int2::Int2(RValue<Int2> rhs)
   4898 	{
   4899 		storeValue(rhs.value);
   4900 	}
   4901 
   4902 	Int2::Int2(const Int2 &rhs)
   4903 	{
   4904 		Value *value = rhs.loadValue();
   4905 		storeValue(value);
   4906 	}
   4907 
   4908 	Int2::Int2(const Reference<Int2> &rhs)
   4909 	{
   4910 		Value *value = rhs.loadValue();
   4911 		storeValue(value);
   4912 	}
   4913 
   4914 	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
   4915 	{
   4916 		int shuffle[4] = {0, 4, 1, 5};
   4917 		Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
   4918 
   4919 		storeValue(Nucleus::createBitCast(packed, Int2::getType()));
   4920 	}
   4921 
   4922 	RValue<Int2> Int2::operator=(RValue<Int2> rhs)
   4923 	{
   4924 		storeValue(rhs.value);
   4925 
   4926 		return rhs;
   4927 	}
   4928 
   4929 	RValue<Int2> Int2::operator=(const Int2 &rhs)
   4930 	{
   4931 		Value *value = rhs.loadValue();
   4932 		storeValue(value);
   4933 
   4934 		return RValue<Int2>(value);
   4935 	}
   4936 
   4937 	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
   4938 	{
   4939 		Value *value = rhs.loadValue();
   4940 		storeValue(value);
   4941 
   4942 		return RValue<Int2>(value);
   4943 	}
   4944 
   4945 	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
   4946 	{
   4947 		return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
   4948 	}
   4949 
   4950 	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
   4951 	{
   4952 		return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
   4953 	}
   4954 
   4955 //	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
   4956 //	{
   4957 //		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
   4958 //	}
   4959 
   4960 //	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
   4961 //	{
   4962 //		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
   4963 //	}
   4964 
   4965 //	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
   4966 //	{
   4967 //		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
   4968 //	}
   4969 
   4970 	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
   4971 	{
   4972 		return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
   4973 	}
   4974 
   4975 	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
   4976 	{
   4977 		return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
   4978 	}
   4979 
   4980 	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
   4981 	{
   4982 		return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
   4983 	}
   4984 
   4985 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
   4986 	{
   4987 #if defined(__i386__) || defined(__x86_64__)
   4988 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
   4989 
   4990 		return x86::pslld(lhs, rhs);
   4991 #else
   4992 		return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
   4993 #endif
   4994 	}
   4995 
   4996 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
   4997 	{
   4998 #if defined(__i386__) || defined(__x86_64__)
   4999 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
   5000 
   5001 		return x86::psrad(lhs, rhs);
   5002 #else
   5003 		return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
   5004 #endif
   5005 	}
   5006 
   5007 	RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
   5008 	{
   5009 		return lhs = lhs + rhs;
   5010 	}
   5011 
   5012 	RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
   5013 	{
   5014 		return lhs = lhs - rhs;
   5015 	}
   5016 
   5017 //	RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
   5018 //	{
   5019 //		return lhs = lhs * rhs;
   5020 //	}
   5021 
   5022 //	RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
   5023 //	{
   5024 //		return lhs = lhs / rhs;
   5025 //	}
   5026 
   5027 //	RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
   5028 //	{
   5029 //		return lhs = lhs % rhs;
   5030 //	}
   5031 
   5032 	RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
   5033 	{
   5034 		return lhs = lhs & rhs;
   5035 	}
   5036 
   5037 	RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
   5038 	{
   5039 		return lhs = lhs | rhs;
   5040 	}
   5041 
   5042 	RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
   5043 	{
   5044 		return lhs = lhs ^ rhs;
   5045 	}
   5046 
   5047 	RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
   5048 	{
   5049 		return lhs = lhs << rhs;
   5050 	}
   5051 
   5052 	RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
   5053 	{
   5054 		return lhs = lhs >> rhs;
   5055 	}
   5056 
   5057 //	RValue<Int2> operator+(RValue<Int2> val)
   5058 //	{
   5059 //		return val;
   5060 //	}
   5061 
   5062 //	RValue<Int2> operator-(RValue<Int2> val)
   5063 //	{
   5064 //		return RValue<Int2>(Nucleus::createNeg(val.value));
   5065 //	}
   5066 
   5067 	RValue<Int2> operator~(RValue<Int2> val)
   5068 	{
   5069 		return RValue<Int2>(Nucleus::createNot(val.value));
   5070 	}
   5071 
   5072 	RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
   5073 	{
   5074 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
   5075 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   5076 	}
   5077 
   5078 	RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
   5079 	{
   5080 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
   5081 		auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   5082 		return As<Short4>(Swizzle(lowHigh, 0xEE));
   5083 	}
   5084 
   5085 	RValue<Int> Extract(RValue<Int2> val, int i)
   5086 	{
   5087 		return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
   5088 	}
   5089 
   5090 	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
   5091 	{
   5092 		return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
   5093 	}
   5094 
   5095 	Type *Int2::getType()
   5096 	{
   5097 		return T(Type_v2i32);
   5098 	}
   5099 
   5100 	UInt2::UInt2(unsigned int x, unsigned int y)
   5101 	{
   5102 		int64_t constantVector[2] = {x, y};
   5103 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   5104 	}
   5105 
   5106 	UInt2::UInt2(RValue<UInt2> rhs)
   5107 	{
   5108 		storeValue(rhs.value);
   5109 	}
   5110 
   5111 	UInt2::UInt2(const UInt2 &rhs)
   5112 	{
   5113 		Value *value = rhs.loadValue();
   5114 		storeValue(value);
   5115 	}
   5116 
   5117 	UInt2::UInt2(const Reference<UInt2> &rhs)
   5118 	{
   5119 		Value *value = rhs.loadValue();
   5120 		storeValue(value);
   5121 	}
   5122 
   5123 	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
   5124 	{
   5125 		storeValue(rhs.value);
   5126 
   5127 		return rhs;
   5128 	}
   5129 
   5130 	RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
   5131 	{
   5132 		Value *value = rhs.loadValue();
   5133 		storeValue(value);
   5134 
   5135 		return RValue<UInt2>(value);
   5136 	}
   5137 
   5138 	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
   5139 	{
   5140 		Value *value = rhs.loadValue();
   5141 		storeValue(value);
   5142 
   5143 		return RValue<UInt2>(value);
   5144 	}
   5145 
   5146 	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5147 	{
   5148 		return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
   5149 	}
   5150 
   5151 	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5152 	{
   5153 		return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
   5154 	}
   5155 
   5156 //	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5157 //	{
   5158 //		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
   5159 //	}
   5160 
   5161 //	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5162 //	{
   5163 //		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
   5164 //	}
   5165 
   5166 //	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5167 //	{
   5168 //		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
   5169 //	}
   5170 
   5171 	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5172 	{
   5173 		return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
   5174 	}
   5175 
   5176 	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5177 	{
   5178 		return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
   5179 	}
   5180 
   5181 	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5182 	{
   5183 		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
   5184 	}
   5185 
   5186 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
   5187 	{
   5188 #if defined(__i386__) || defined(__x86_64__)
   5189 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
   5190 
   5191 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
   5192 #else
   5193 		return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
   5194 #endif
   5195 	}
   5196 
   5197 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
   5198 	{
   5199 #if defined(__i386__) || defined(__x86_64__)
   5200 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
   5201 
   5202 		return x86::psrld(lhs, rhs);
   5203 #else
   5204 		return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
   5205 #endif
   5206 	}
   5207 
   5208 	RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
   5209 	{
   5210 		return lhs = lhs + rhs;
   5211 	}
   5212 
   5213 	RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
   5214 	{
   5215 		return lhs = lhs - rhs;
   5216 	}
   5217 
   5218 //	RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
   5219 //	{
   5220 //		return lhs = lhs * rhs;
   5221 //	}
   5222 
   5223 //	RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
   5224 //	{
   5225 //		return lhs = lhs / rhs;
   5226 //	}
   5227 
   5228 //	RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
   5229 //	{
   5230 //		return lhs = lhs % rhs;
   5231 //	}
   5232 
   5233 	RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
   5234 	{
   5235 		return lhs = lhs & rhs;
   5236 	}
   5237 
   5238 	RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
   5239 	{
   5240 		return lhs = lhs | rhs;
   5241 	}
   5242 
   5243 	RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
   5244 	{
   5245 		return lhs = lhs ^ rhs;
   5246 	}
   5247 
   5248 	RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
   5249 	{
   5250 		return lhs = lhs << rhs;
   5251 	}
   5252 
   5253 	RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
   5254 	{
   5255 		return lhs = lhs >> rhs;
   5256 	}
   5257 
   5258 //	RValue<UInt2> operator+(RValue<UInt2> val)
   5259 //	{
   5260 //		return val;
   5261 //	}
   5262 
   5263 //	RValue<UInt2> operator-(RValue<UInt2> val)
   5264 //	{
   5265 //		return RValue<UInt2>(Nucleus::createNeg(val.value));
   5266 //	}
   5267 
   5268 	RValue<UInt2> operator~(RValue<UInt2> val)
   5269 	{
   5270 		return RValue<UInt2>(Nucleus::createNot(val.value));
   5271 	}
   5272 
   5273 	Type *UInt2::getType()
   5274 	{
   5275 		return T(Type_v2i32);
   5276 	}
   5277 
   5278 	Int4::Int4() : XYZW(this)
   5279 	{
   5280 	}
   5281 
   5282 	Int4::Int4(RValue<Byte4> cast) : XYZW(this)
   5283 	{
   5284 #if defined(__i386__) || defined(__x86_64__)
   5285 		if(CPUID::supportsSSE4_1())
   5286 		{
   5287 			*this = x86::pmovzxbd(As<Byte16>(cast));
   5288 		}
   5289 		else
   5290 #endif
   5291 		{
   5292 			int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
   5293 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
   5294 			Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
   5295 
   5296 			int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
   5297 			Value *c = Nucleus::createBitCast(b, Short8::getType());
   5298 			Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
   5299 
   5300 			*this = As<Int4>(d);
   5301 		}
   5302 	}
   5303 
   5304 	Int4::Int4(RValue<SByte4> cast) : XYZW(this)
   5305 	{
   5306 #if defined(__i386__) || defined(__x86_64__)
   5307 		if(CPUID::supportsSSE4_1())
   5308 		{
   5309 			*this = x86::pmovsxbd(As<SByte16>(cast));
   5310 		}
   5311 		else
   5312 #endif
   5313 		{
   5314 			int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
   5315 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
   5316 			Value *b = Nucleus::createShuffleVector(a, a, swizzle);
   5317 
   5318 			int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
   5319 			Value *c = Nucleus::createBitCast(b, Short8::getType());
   5320 			Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
   5321 
   5322 			*this = As<Int4>(d) >> 24;
   5323 		}
   5324 	}
   5325 
   5326 	Int4::Int4(RValue<Float4> cast) : XYZW(this)
   5327 	{
   5328 		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
   5329 
   5330 		storeValue(xyzw);
   5331 	}
   5332 
   5333 	Int4::Int4(RValue<Short4> cast) : XYZW(this)
   5334 	{
   5335 #if defined(__i386__) || defined(__x86_64__)
   5336 		if(CPUID::supportsSSE4_1())
   5337 		{
   5338 			*this = x86::pmovsxwd(As<Short8>(cast));
   5339 		}
   5340 		else
   5341 #endif
   5342 		{
   5343 			int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
   5344 			Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
   5345 			*this = As<Int4>(c) >> 16;
   5346 		}
   5347 	}
   5348 
   5349 	Int4::Int4(RValue<UShort4> cast) : XYZW(this)
   5350 	{
   5351 #if defined(__i386__) || defined(__x86_64__)
   5352 		if(CPUID::supportsSSE4_1())
   5353 		{
   5354 			*this = x86::pmovzxwd(As<UShort8>(cast));
   5355 		}
   5356 		else
   5357 #endif
   5358 		{
   5359 			int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
   5360 			Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
   5361 			*this = As<Int4>(c);
   5362 		}
   5363 	}
   5364 
   5365 	Int4::Int4(int xyzw) : XYZW(this)
   5366 	{
   5367 		constant(xyzw, xyzw, xyzw, xyzw);
   5368 	}
   5369 
   5370 	Int4::Int4(int x, int yzw) : XYZW(this)
   5371 	{
   5372 		constant(x, yzw, yzw, yzw);
   5373 	}
   5374 
   5375 	Int4::Int4(int x, int y, int zw) : XYZW(this)
   5376 	{
   5377 		constant(x, y, zw, zw);
   5378 	}
   5379 
   5380 	Int4::Int4(int x, int y, int z, int w) : XYZW(this)
   5381 	{
   5382 		constant(x, y, z, w);
   5383 	}
   5384 
   5385 	void Int4::constant(int x, int y, int z, int w)
   5386 	{
   5387 		int64_t constantVector[4] = {x, y, z, w};
   5388 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   5389 	}
   5390 
   5391 	Int4::Int4(RValue<Int4> rhs) : XYZW(this)
   5392 	{
   5393 		storeValue(rhs.value);
   5394 	}
   5395 
   5396 	Int4::Int4(const Int4 &rhs) : XYZW(this)
   5397 	{
   5398 		Value *value = rhs.loadValue();
   5399 		storeValue(value);
   5400 	}
   5401 
   5402 	Int4::Int4(const Reference<Int4> &rhs) : XYZW(this)
   5403 	{
   5404 		Value *value = rhs.loadValue();
   5405 		storeValue(value);
   5406 	}
   5407 
   5408 	Int4::Int4(RValue<UInt4> rhs) : XYZW(this)
   5409 	{
   5410 		storeValue(rhs.value);
   5411 	}
   5412 
   5413 	Int4::Int4(const UInt4 &rhs) : XYZW(this)
   5414 	{
   5415 		Value *value = rhs.loadValue();
   5416 		storeValue(value);
   5417 	}
   5418 
   5419 	Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this)
   5420 	{
   5421 		Value *value = rhs.loadValue();
   5422 		storeValue(value);
   5423 	}
   5424 
   5425 	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this)
   5426 	{
   5427 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
   5428 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   5429 
   5430 		storeValue(packed);
   5431 	}
   5432 
   5433 	Int4::Int4(RValue<Int> rhs) : XYZW(this)
   5434 	{
   5435 		Value *vector = loadValue();
   5436 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
   5437 
   5438 		int swizzle[4] = {0, 0, 0, 0};
   5439 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
   5440 
   5441 		storeValue(replicate);
   5442 	}
   5443 
   5444 	Int4::Int4(const Int &rhs) : XYZW(this)
   5445 	{
   5446 		*this = RValue<Int>(rhs.loadValue());
   5447 	}
   5448 
   5449 	Int4::Int4(const Reference<Int> &rhs) : XYZW(this)
   5450 	{
   5451 		*this = RValue<Int>(rhs.loadValue());
   5452 	}
   5453 
   5454 	RValue<Int4> Int4::operator=(RValue<Int4> rhs)
   5455 	{
   5456 		storeValue(rhs.value);
   5457 
   5458 		return rhs;
   5459 	}
   5460 
   5461 	RValue<Int4> Int4::operator=(const Int4 &rhs)
   5462 	{
   5463 		Value *value = rhs.loadValue();
   5464 		storeValue(value);
   5465 
   5466 		return RValue<Int4>(value);
   5467 	}
   5468 
   5469 	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
   5470 	{
   5471 		Value *value = rhs.loadValue();
   5472 		storeValue(value);
   5473 
   5474 		return RValue<Int4>(value);
   5475 	}
   5476 
   5477 	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
   5478 	{
   5479 		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
   5480 	}
   5481 
   5482 	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
   5483 	{
   5484 		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
   5485 	}
   5486 
   5487 	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
   5488 	{
   5489 		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
   5490 	}
   5491 
   5492 	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
   5493 	{
   5494 		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
   5495 	}
   5496 
   5497 	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
   5498 	{
   5499 		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
   5500 	}
   5501 
   5502 	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
   5503 	{
   5504 		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
   5505 	}
   5506 
   5507 	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
   5508 	{
   5509 		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
   5510 	}
   5511 
   5512 	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
   5513 	{
   5514 		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
   5515 	}
   5516 
   5517 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
   5518 	{
   5519 #if defined(__i386__) || defined(__x86_64__)
   5520 		return x86::pslld(lhs, rhs);
   5521 #else
   5522 		return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
   5523 #endif
   5524 	}
   5525 
   5526 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
   5527 	{
   5528 #if defined(__i386__) || defined(__x86_64__)
   5529 		return x86::psrad(lhs, rhs);
   5530 #else
   5531 		return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
   5532 #endif
   5533 	}
   5534 
   5535 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
   5536 	{
   5537 		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
   5538 	}
   5539 
   5540 	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
   5541 	{
   5542 		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
   5543 	}
   5544 
   5545 	RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
   5546 	{
   5547 		return lhs = lhs + rhs;
   5548 	}
   5549 
   5550 	RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
   5551 	{
   5552 		return lhs = lhs - rhs;
   5553 	}
   5554 
   5555 	RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
   5556 	{
   5557 		return lhs = lhs * rhs;
   5558 	}
   5559 
   5560 //	RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
   5561 //	{
   5562 //		return lhs = lhs / rhs;
   5563 //	}
   5564 
   5565 //	RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
   5566 //	{
   5567 //		return lhs = lhs % rhs;
   5568 //	}
   5569 
   5570 	RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
   5571 	{
   5572 		return lhs = lhs & rhs;
   5573 	}
   5574 
   5575 	RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
   5576 	{
   5577 		return lhs = lhs | rhs;
   5578 	}
   5579 
   5580 	RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
   5581 	{
   5582 		return lhs = lhs ^ rhs;
   5583 	}
   5584 
   5585 	RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
   5586 	{
   5587 		return lhs = lhs << rhs;
   5588 	}
   5589 
   5590 	RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
   5591 	{
   5592 		return lhs = lhs >> rhs;
   5593 	}
   5594 
   5595 	RValue<Int4> operator+(RValue<Int4> val)
   5596 	{
   5597 		return val;
   5598 	}
   5599 
   5600 	RValue<Int4> operator-(RValue<Int4> val)
   5601 	{
   5602 		return RValue<Int4>(Nucleus::createNeg(val.value));
   5603 	}
   5604 
   5605 	RValue<Int4> operator~(RValue<Int4> val)
   5606 	{
   5607 		return RValue<Int4>(Nucleus::createNot(val.value));
   5608 	}
   5609 
   5610 	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
   5611 	{
   5612 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5613 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5614 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   5615 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5616 	}
   5617 
   5618 	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
   5619 	{
   5620 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5621 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5622 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
   5623 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5624 	}
   5625 
   5626 	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
   5627 	{
   5628 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5629 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5630 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
   5631 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5632 	}
   5633 
   5634 	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
   5635 	{
   5636 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5637 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5638 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   5639 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5640 	}
   5641 
   5642 	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
   5643 	{
   5644 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5645 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5646 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
   5647 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5648 	}
   5649 
   5650 	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
   5651 	{
   5652 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5653 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5654 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
   5655 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5656 	}
   5657 
   5658 	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
   5659 	{
   5660 #if defined(__i386__) || defined(__x86_64__)
   5661 		if(CPUID::supportsSSE4_1())
   5662 		{
   5663 			return x86::pmaxsd(x, y);
   5664 		}
   5665 		else
   5666 #endif
   5667 		{
   5668 			RValue<Int4> greater = CmpNLE(x, y);
   5669 			return (x & greater) | (y & ~greater);
   5670 		}
   5671 	}
   5672 
   5673 	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
   5674 	{
   5675 #if defined(__i386__) || defined(__x86_64__)
   5676 		if(CPUID::supportsSSE4_1())
   5677 		{
   5678 			return x86::pminsd(x, y);
   5679 		}
   5680 		else
   5681 #endif
   5682 		{
   5683 			RValue<Int4> less = CmpLT(x, y);
   5684 			return (x & less) | (y & ~less);
   5685 		}
   5686 	}
   5687 
   5688 	RValue<Int4> RoundInt(RValue<Float4> cast)
   5689 	{
   5690 #if defined(__i386__) || defined(__x86_64__)
   5691 		return x86::cvtps2dq(cast);
   5692 #else
   5693 		return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
   5694 #endif
   5695 	}
   5696 
   5697 	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
   5698 	{
   5699 #if defined(__i386__) || defined(__x86_64__)
   5700 		return x86::packssdw(x, y);
   5701 #else
   5702 		return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
   5703 #endif
   5704 	}
   5705 
   5706 	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
   5707 	{
   5708 #if defined(__i386__) || defined(__x86_64__)
   5709 		return x86::packusdw(x, y);
   5710 #else
   5711 		return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
   5712 #endif
   5713 	}
   5714 
   5715 	RValue<Int> Extract(RValue<Int4> x, int i)
   5716 	{
   5717 		return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
   5718 	}
   5719 
   5720 	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
   5721 	{
   5722 		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
   5723 	}
   5724 
   5725 	RValue<Int> SignMask(RValue<Int4> x)
   5726 	{
   5727 #if defined(__i386__) || defined(__x86_64__)
   5728 		return x86::movmskps(As<Float4>(x));
   5729 #else
   5730 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
   5731 #endif
   5732 	}
   5733 
   5734 	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
   5735 	{
   5736 		return RValue<Int4>(createSwizzle4(x.value, select));
   5737 	}
   5738 
   5739 	Type *Int4::getType()
   5740 	{
   5741 		return T(llvm::VectorType::get(T(Int::getType()), 4));
   5742 	}
   5743 
   5744 	UInt4::UInt4() : XYZW(this)
   5745 	{
   5746 	}
   5747 
   5748 	UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
   5749 	{
   5750 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
   5751 		// Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
   5752 
   5753 		// Smallest positive value representable in UInt, but not in Int
   5754 		const unsigned int ustart = 0x80000000u;
   5755 		const float ustartf = float(ustart);
   5756 
   5757 		// Check if the value can be represented as an Int
   5758 		Int4 uiValue = CmpNLT(cast, Float4(ustartf));
   5759 		// If the value is too large, subtract ustart and re-add it after conversion.
   5760 		uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
   5761 		// Otherwise, just convert normally
   5762 		          (~uiValue & Int4(cast));
   5763 		// If the value is negative, store 0, otherwise store the result of the conversion
   5764 		storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
   5765 	}
   5766 
   5767 	UInt4::UInt4(int xyzw) : XYZW(this)
   5768 	{
   5769 		constant(xyzw, xyzw, xyzw, xyzw);
   5770 	}
   5771 
   5772 	UInt4::UInt4(int x, int yzw) : XYZW(this)
   5773 	{
   5774 		constant(x, yzw, yzw, yzw);
   5775 	}
   5776 
   5777 	UInt4::UInt4(int x, int y, int zw) : XYZW(this)
   5778 	{
   5779 		constant(x, y, zw, zw);
   5780 	}
   5781 
   5782 	UInt4::UInt4(int x, int y, int z, int w) : XYZW(this)
   5783 	{
   5784 		constant(x, y, z, w);
   5785 	}
   5786 
   5787 	void UInt4::constant(int x, int y, int z, int w)
   5788 	{
   5789 		int64_t constantVector[4] = {x, y, z, w};
   5790 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   5791 	}
   5792 
   5793 	UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this)
   5794 	{
   5795 		storeValue(rhs.value);
   5796 	}
   5797 
   5798 	UInt4::UInt4(const UInt4 &rhs) : XYZW(this)
   5799 	{
   5800 		Value *value = rhs.loadValue();
   5801 		storeValue(value);
   5802 	}
   5803 
   5804 	UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this)
   5805 	{
   5806 		Value *value = rhs.loadValue();
   5807 		storeValue(value);
   5808 	}
   5809 
   5810 	UInt4::UInt4(RValue<Int4> rhs) : XYZW(this)
   5811 	{
   5812 		storeValue(rhs.value);
   5813 	}
   5814 
   5815 	UInt4::UInt4(const Int4 &rhs) : XYZW(this)
   5816 	{
   5817 		Value *value = rhs.loadValue();
   5818 		storeValue(value);
   5819 	}
   5820 
   5821 	UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this)
   5822 	{
   5823 		Value *value = rhs.loadValue();
   5824 		storeValue(value);
   5825 	}
   5826 
   5827 	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this)
   5828 	{
   5829 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
   5830 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   5831 
   5832 		storeValue(packed);
   5833 	}
   5834 
   5835 	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
   5836 	{
   5837 		storeValue(rhs.value);
   5838 
   5839 		return rhs;
   5840 	}
   5841 
   5842 	RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
   5843 	{
   5844 		Value *value = rhs.loadValue();
   5845 		storeValue(value);
   5846 
   5847 		return RValue<UInt4>(value);
   5848 	}
   5849 
   5850 	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
   5851 	{
   5852 		Value *value = rhs.loadValue();
   5853 		storeValue(value);
   5854 
   5855 		return RValue<UInt4>(value);
   5856 	}
   5857 
   5858 	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5859 	{
   5860 		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
   5861 	}
   5862 
   5863 	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5864 	{
   5865 		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
   5866 	}
   5867 
   5868 	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5869 	{
   5870 		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
   5871 	}
   5872 
   5873 	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5874 	{
   5875 		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
   5876 	}
   5877 
   5878 	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5879 	{
   5880 		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
   5881 	}
   5882 
   5883 	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5884 	{
   5885 		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
   5886 	}
   5887 
   5888 	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5889 	{
   5890 		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
   5891 	}
   5892 
   5893 	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5894 	{
   5895 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
   5896 	}
   5897 
   5898 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
   5899 	{
   5900 #if defined(__i386__) || defined(__x86_64__)
   5901 		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
   5902 #else
   5903 		return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
   5904 #endif
   5905 	}
   5906 
   5907 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
   5908 	{
   5909 #if defined(__i386__) || defined(__x86_64__)
   5910 		return x86::psrld(lhs, rhs);
   5911 #else
   5912 		return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
   5913 #endif
   5914 	}
   5915 
   5916 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5917 	{
   5918 		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
   5919 	}
   5920 
   5921 	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5922 	{
   5923 		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
   5924 	}
   5925 
   5926 	RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
   5927 	{
   5928 		return lhs = lhs + rhs;
   5929 	}
   5930 
   5931 	RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
   5932 	{
   5933 		return lhs = lhs - rhs;
   5934 	}
   5935 
   5936 	RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
   5937 	{
   5938 		return lhs = lhs * rhs;
   5939 	}
   5940 
   5941 //	RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
   5942 //	{
   5943 //		return lhs = lhs / rhs;
   5944 //	}
   5945 
   5946 //	RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
   5947 //	{
   5948 //		return lhs = lhs % rhs;
   5949 //	}
   5950 
   5951 	RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
   5952 	{
   5953 		return lhs = lhs & rhs;
   5954 	}
   5955 
   5956 	RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
   5957 	{
   5958 		return lhs = lhs | rhs;
   5959 	}
   5960 
   5961 	RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
   5962 	{
   5963 		return lhs = lhs ^ rhs;
   5964 	}
   5965 
   5966 	RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
   5967 	{
   5968 		return lhs = lhs << rhs;
   5969 	}
   5970 
   5971 	RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
   5972 	{
   5973 		return lhs = lhs >> rhs;
   5974 	}
   5975 
   5976 	RValue<UInt4> operator+(RValue<UInt4> val)
   5977 	{
   5978 		return val;
   5979 	}
   5980 
   5981 	RValue<UInt4> operator-(RValue<UInt4> val)
   5982 	{
   5983 		return RValue<UInt4>(Nucleus::createNeg(val.value));
   5984 	}
   5985 
   5986 	RValue<UInt4> operator~(RValue<UInt4> val)
   5987 	{
   5988 		return RValue<UInt4>(Nucleus::createNot(val.value));
   5989 	}
   5990 
   5991 	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
   5992 	{
   5993 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5994 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5995 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   5996 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5997 	}
   5998 
   5999 	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
   6000 	{
   6001 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
   6002 	}
   6003 
   6004 	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
   6005 	{
   6006 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   6007 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   6008 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
   6009 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   6010 	}
   6011 
   6012 	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
   6013 	{
   6014 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   6015 	}
   6016 
   6017 	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
   6018 	{
   6019 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   6020 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   6021 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
   6022 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   6023 	}
   6024 
   6025 	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
   6026 	{
   6027 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
   6028 	}
   6029 
   6030 	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
   6031 	{
   6032 #if defined(__i386__) || defined(__x86_64__)
   6033 		if(CPUID::supportsSSE4_1())
   6034 		{
   6035 			return x86::pmaxud(x, y);
   6036 		}
   6037 		else
   6038 #endif
   6039 		{
   6040 			RValue<UInt4> greater = CmpNLE(x, y);
   6041 			return (x & greater) | (y & ~greater);
   6042 		}
   6043 	}
   6044 
   6045 	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
   6046 	{
   6047 #if defined(__i386__) || defined(__x86_64__)
   6048 		if(CPUID::supportsSSE4_1())
   6049 		{
   6050 			return x86::pminud(x, y);
   6051 		}
   6052 		else
   6053 #endif
   6054 		{
   6055 			RValue<UInt4> less = CmpLT(x, y);
   6056 			return (x & less) | (y & ~less);
   6057 		}
   6058 	}
   6059 
   6060 	Type *UInt4::getType()
   6061 	{
   6062 		return T(llvm::VectorType::get(T(UInt::getType()), 4));
   6063 	}
   6064 
   6065 	Half::Half(RValue<Float> cast)
   6066 	{
   6067 		UInt fp32i = As<UInt>(cast);
   6068 		UInt abs = fp32i & 0x7FFFFFFF;
   6069 		UShort fp16i((fp32i & 0x80000000) >> 16); // sign
   6070 
   6071 		If(abs > 0x47FFEFFF) // Infinity
   6072 		{
   6073 			fp16i |= UShort(0x7FFF);
   6074 		}
   6075 		Else
   6076 		{
   6077 			If(abs < 0x38800000) // Denormal
   6078 			{
   6079 				Int mantissa = (abs & 0x007FFFFF) | 0x00800000;
   6080 				Int e = 113 - (abs >> 23);
   6081 				abs = IfThenElse(e < 24, mantissa >> e, Int(0));
   6082 				fp16i |= UShort((abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
   6083 			}
   6084 			Else
   6085 			{
   6086 				fp16i |= UShort((abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
   6087 			}
   6088 		}
   6089 
   6090 		storeValue(fp16i.loadValue());
   6091 	}
   6092 
   6093 	Type *Half::getType()
   6094 	{
   6095 		return T(llvm::Type::getInt16Ty(*::context));
   6096 	}
   6097 
   6098 	Float::Float(RValue<Int> cast)
   6099 	{
   6100 		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
   6101 
   6102 		storeValue(integer);
   6103 	}
   6104 
   6105 	Float::Float(RValue<UInt> cast)
   6106 	{
   6107 		RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
   6108 		                       As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
   6109 
   6110 		storeValue(result.value);
   6111 	}
   6112 
   6113 	Float::Float(RValue<Half> cast)
   6114 	{
   6115 		Int fp16i(As<UShort>(cast));
   6116 
   6117 		Int s = (fp16i >> 15) & 0x00000001;
   6118 		Int e = (fp16i >> 10) & 0x0000001F;
   6119 		Int m = fp16i & 0x000003FF;
   6120 
   6121 		UInt fp32i(s << 31);
   6122 		If(e == 0)
   6123 		{
   6124 			If(m != 0)
   6125 			{
   6126 				While((m & 0x00000400) == 0)
   6127 				{
   6128 					m <<= 1;
   6129 					e -= 1;
   6130 				}
   6131 
   6132 				fp32i |= As<UInt>(((e + (127 - 15) + 1) << 23) | ((m & ~0x00000400) << 13));
   6133 			}
   6134 		}
   6135 		Else
   6136 		{
   6137 			fp32i |= As<UInt>(((e + (127 - 15)) << 23) | (m << 13));
   6138 		}
   6139 
   6140 		storeValue(As<Float>(fp32i).value);
   6141 	}
   6142 
   6143 	Float::Float(float x)
   6144 	{
   6145 		storeValue(Nucleus::createConstantFloat(x));
   6146 	}
   6147 
   6148 	Float::Float(RValue<Float> rhs)
   6149 	{
   6150 		storeValue(rhs.value);
   6151 	}
   6152 
   6153 	Float::Float(const Float &rhs)
   6154 	{
   6155 		Value *value = rhs.loadValue();
   6156 		storeValue(value);
   6157 	}
   6158 
   6159 	Float::Float(const Reference<Float> &rhs)
   6160 	{
   6161 		Value *value = rhs.loadValue();
   6162 		storeValue(value);
   6163 	}
   6164 
   6165 	RValue<Float> Float::operator=(RValue<Float> rhs)
   6166 	{
   6167 		storeValue(rhs.value);
   6168 
   6169 		return rhs;
   6170 	}
   6171 
   6172 	RValue<Float> Float::operator=(const Float &rhs)
   6173 	{
   6174 		Value *value = rhs.loadValue();
   6175 		storeValue(value);
   6176 
   6177 		return RValue<Float>(value);
   6178 	}
   6179 
   6180 	RValue<Float> Float::operator=(const Reference<Float> &rhs)
   6181 	{
   6182 		Value *value = rhs.loadValue();
   6183 		storeValue(value);
   6184 
   6185 		return RValue<Float>(value);
   6186 	}
   6187 
   6188 	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
   6189 	{
   6190 		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
   6191 	}
   6192 
   6193 	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
   6194 	{
   6195 		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
   6196 	}
   6197 
   6198 	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
   6199 	{
   6200 		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
   6201 	}
   6202 
   6203 	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
   6204 	{
   6205 		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
   6206 	}
   6207 
   6208 	RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
   6209 	{
   6210 		return lhs = lhs + rhs;
   6211 	}
   6212 
   6213 	RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
   6214 	{
   6215 		return lhs = lhs - rhs;
   6216 	}
   6217 
   6218 	RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
   6219 	{
   6220 		return lhs = lhs * rhs;
   6221 	}
   6222 
   6223 	RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
   6224 	{
   6225 		return lhs = lhs / rhs;
   6226 	}
   6227 
   6228 	RValue<Float> operator+(RValue<Float> val)
   6229 	{
   6230 		return val;
   6231 	}
   6232 
   6233 	RValue<Float> operator-(RValue<Float> val)
   6234 	{
   6235 		return RValue<Float>(Nucleus::createFNeg(val.value));
   6236 	}
   6237 
   6238 	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
   6239 	{
   6240 		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
   6241 	}
   6242 
   6243 	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
   6244 	{
   6245 		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
   6246 	}
   6247 
   6248 	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
   6249 	{
   6250 		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
   6251 	}
   6252 
   6253 	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
   6254 	{
   6255 		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
   6256 	}
   6257 
   6258 	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
   6259 	{
   6260 		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
   6261 	}
   6262 
   6263 	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
   6264 	{
   6265 		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
   6266 	}
   6267 
   6268 	RValue<Float> Abs(RValue<Float> x)
   6269 	{
   6270 		return IfThenElse(x > 0.0f, x, -x);
   6271 	}
   6272 
   6273 	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
   6274 	{
   6275 		return IfThenElse(x > y, x, y);
   6276 	}
   6277 
   6278 	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
   6279 	{
   6280 		return IfThenElse(x < y, x, y);
   6281 	}
   6282 
   6283 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
   6284 	{
   6285 #if defined(__i386__) || defined(__x86_64__)
   6286 		if(exactAtPow2)
   6287 		{
   6288 			// rcpss uses a piecewise-linear approximation which minimizes the relative error
   6289 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   6290 			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   6291 		}
   6292 		return x86::rcpss(x);
   6293 #else
   6294 		return As<Float>(V(lowerRCP(V(x.value))));
   6295 #endif
   6296 	}
   6297 
   6298 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
   6299 	{
   6300 #if defined(__i386__) || defined(__x86_64__)
   6301 		return x86::rsqrtss(x);
   6302 #else
   6303 		return As<Float>(V(lowerRSQRT(V(x.value))));
   6304 #endif
   6305 	}
   6306 
   6307 	RValue<Float> Sqrt(RValue<Float> x)
   6308 	{
   6309 #if defined(__i386__) || defined(__x86_64__)
   6310 		return x86::sqrtss(x);
   6311 #else
   6312 		return As<Float>(V(lowerSQRT(V(x.value))));
   6313 #endif
   6314 	}
   6315 
   6316 	RValue<Float> Round(RValue<Float> x)
   6317 	{
   6318 #if defined(__i386__) || defined(__x86_64__)
   6319 		if(CPUID::supportsSSE4_1())
   6320 		{
   6321 			return x86::roundss(x, 0);
   6322 		}
   6323 		else
   6324 		{
   6325 			return Float4(Round(Float4(x))).x;
   6326 		}
   6327 #else
   6328 		return RValue<Float>(V(lowerRound(V(x.value))));
   6329 #endif
   6330 	}
   6331 
   6332 	RValue<Float> Trunc(RValue<Float> x)
   6333 	{
   6334 #if defined(__i386__) || defined(__x86_64__)
   6335 		if(CPUID::supportsSSE4_1())
   6336 		{
   6337 			return x86::roundss(x, 3);
   6338 		}
   6339 		else
   6340 		{
   6341 			return Float(Int(x));   // Rounded toward zero
   6342 		}
   6343 #else
   6344 		return RValue<Float>(V(lowerTrunc(V(x.value))));
   6345 #endif
   6346 	}
   6347 
   6348 	RValue<Float> Frac(RValue<Float> x)
   6349 	{
   6350 #if defined(__i386__) || defined(__x86_64__)
   6351 		if(CPUID::supportsSSE4_1())
   6352 		{
   6353 			return x - x86::floorss(x);
   6354 		}
   6355 		else
   6356 		{
   6357 			return Float4(Frac(Float4(x))).x;
   6358 		}
   6359 #else
   6360 		// x - floor(x) can be 1.0 for very small negative x.
   6361 		// Clamp against the value just below 1.0.
   6362 		return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
   6363 #endif
   6364 	}
   6365 
   6366 	RValue<Float> Floor(RValue<Float> x)
   6367 	{
   6368 #if defined(__i386__) || defined(__x86_64__)
   6369 		if(CPUID::supportsSSE4_1())
   6370 		{
   6371 			return x86::floorss(x);
   6372 		}
   6373 		else
   6374 		{
   6375 			return Float4(Floor(Float4(x))).x;
   6376 		}
   6377 #else
   6378 		return RValue<Float>(V(lowerFloor(V(x.value))));
   6379 #endif
   6380 	}
   6381 
   6382 	RValue<Float> Ceil(RValue<Float> x)
   6383 	{
   6384 #if defined(__i386__) || defined(__x86_64__)
   6385 		if(CPUID::supportsSSE4_1())
   6386 		{
   6387 			return x86::ceilss(x);
   6388 		}
   6389 		else
   6390 #endif
   6391 		{
   6392 			return Float4(Ceil(Float4(x))).x;
   6393 		}
   6394 	}
   6395 
   6396 	Type *Float::getType()
   6397 	{
   6398 		return T(llvm::Type::getFloatTy(*::context));
   6399 	}
   6400 
   6401 	Float2::Float2(RValue<Float4> cast)
   6402 	{
   6403 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   6404 	}
   6405 
   6406 	Type *Float2::getType()
   6407 	{
   6408 		return T(Type_v2f32);
   6409 	}
   6410 
   6411 	Float4::Float4(RValue<Byte4> cast) : XYZW(this)
   6412 	{
   6413 		Value *a = Int4(cast).loadValue();
   6414 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
   6415 
   6416 		storeValue(xyzw);
   6417 	}
   6418 
   6419 	Float4::Float4(RValue<SByte4> cast) : XYZW(this)
   6420 	{
   6421 		Value *a = Int4(cast).loadValue();
   6422 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
   6423 
   6424 		storeValue(xyzw);
   6425 	}
   6426 
   6427 	Float4::Float4(RValue<Short4> cast) : XYZW(this)
   6428 	{
   6429 		Int4 c(cast);
   6430 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   6431 	}
   6432 
   6433 	Float4::Float4(RValue<UShort4> cast) : XYZW(this)
   6434 	{
   6435 		Int4 c(cast);
   6436 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   6437 	}
   6438 
   6439 	Float4::Float4(RValue<Int4> cast) : XYZW(this)
   6440 	{
   6441 		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
   6442 
   6443 		storeValue(xyzw);
   6444 	}
   6445 
   6446 	Float4::Float4(RValue<UInt4> cast) : XYZW(this)
   6447 	{
   6448 		RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
   6449 		                        As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
   6450 
   6451 		storeValue(result.value);
   6452 	}
   6453 
   6454 	Float4::Float4() : XYZW(this)
   6455 	{
   6456 	}
   6457 
   6458 	Float4::Float4(float xyzw) : XYZW(this)
   6459 	{
   6460 		constant(xyzw, xyzw, xyzw, xyzw);
   6461 	}
   6462 
   6463 	Float4::Float4(float x, float yzw) : XYZW(this)
   6464 	{
   6465 		constant(x, yzw, yzw, yzw);
   6466 	}
   6467 
   6468 	Float4::Float4(float x, float y, float zw) : XYZW(this)
   6469 	{
   6470 		constant(x, y, zw, zw);
   6471 	}
   6472 
   6473 	Float4::Float4(float x, float y, float z, float w) : XYZW(this)
   6474 	{
   6475 		constant(x, y, z, w);
   6476 	}
   6477 
   6478 	void Float4::constant(float x, float y, float z, float w)
   6479 	{
   6480 		double constantVector[4] = {x, y, z, w};
   6481 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   6482 	}
   6483 
   6484 	Float4::Float4(RValue<Float4> rhs) : XYZW(this)
   6485 	{
   6486 		storeValue(rhs.value);
   6487 	}
   6488 
   6489 	Float4::Float4(const Float4 &rhs) : XYZW(this)
   6490 	{
   6491 		Value *value = rhs.loadValue();
   6492 		storeValue(value);
   6493 	}
   6494 
   6495 	Float4::Float4(const Reference<Float4> &rhs) : XYZW(this)
   6496 	{
   6497 		Value *value = rhs.loadValue();
   6498 		storeValue(value);
   6499 	}
   6500 
   6501 	Float4::Float4(RValue<Float> rhs) : XYZW(this)
   6502 	{
   6503 		Value *vector = loadValue();
   6504 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
   6505 
   6506 		int swizzle[4] = {0, 0, 0, 0};
   6507 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
   6508 
   6509 		storeValue(replicate);
   6510 	}
   6511 
   6512 	Float4::Float4(const Float &rhs) : XYZW(this)
   6513 	{
   6514 		*this = RValue<Float>(rhs.loadValue());
   6515 	}
   6516 
   6517 	Float4::Float4(const Reference<Float> &rhs) : XYZW(this)
   6518 	{
   6519 		*this = RValue<Float>(rhs.loadValue());
   6520 	}
   6521 
   6522 	RValue<Float4> Float4::operator=(float x)
   6523 	{
   6524 		return *this = Float4(x, x, x, x);
   6525 	}
   6526 
   6527 	RValue<Float4> Float4::operator=(RValue<Float4> rhs)
   6528 	{
   6529 		storeValue(rhs.value);
   6530 
   6531 		return rhs;
   6532 	}
   6533 
   6534 	RValue<Float4> Float4::operator=(const Float4 &rhs)
   6535 	{
   6536 		Value *value = rhs.loadValue();
   6537 		storeValue(value);
   6538 
   6539 		return RValue<Float4>(value);
   6540 	}
   6541 
   6542 	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
   6543 	{
   6544 		Value *value = rhs.loadValue();
   6545 		storeValue(value);
   6546 
   6547 		return RValue<Float4>(value);
   6548 	}
   6549 
   6550 	RValue<Float4> Float4::operator=(RValue<Float> rhs)
   6551 	{
   6552 		return *this = Float4(rhs);
   6553 	}
   6554 
   6555 	RValue<Float4> Float4::operator=(const Float &rhs)
   6556 	{
   6557 		return *this = Float4(rhs);
   6558 	}
   6559 
   6560 	RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
   6561 	{
   6562 		return *this = Float4(rhs);
   6563 	}
   6564 
   6565 	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
   6566 	{
   6567 		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
   6568 	}
   6569 
   6570 	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
   6571 	{
   6572 		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
   6573 	}
   6574 
   6575 	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
   6576 	{
   6577 		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
   6578 	}
   6579 
   6580 	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
   6581 	{
   6582 		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
   6583 	}
   6584 
   6585 	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
   6586 	{
   6587 		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
   6588 	}
   6589 
   6590 	RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
   6591 	{
   6592 		return lhs = lhs + rhs;
   6593 	}
   6594 
   6595 	RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
   6596 	{
   6597 		return lhs = lhs - rhs;
   6598 	}
   6599 
   6600 	RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
   6601 	{
   6602 		return lhs = lhs * rhs;
   6603 	}
   6604 
   6605 	RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
   6606 	{
   6607 		return lhs = lhs / rhs;
   6608 	}
   6609 
   6610 	RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
   6611 	{
   6612 		return lhs = lhs % rhs;
   6613 	}
   6614 
   6615 	RValue<Float4> operator+(RValue<Float4> val)
   6616 	{
   6617 		return val;
   6618 	}
   6619 
   6620 	RValue<Float4> operator-(RValue<Float4> val)
   6621 	{
   6622 		return RValue<Float4>(Nucleus::createFNeg(val.value));
   6623 	}
   6624 
   6625 	RValue<Float4> Abs(RValue<Float4> x)
   6626 	{
   6627 		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
   6628 		int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
   6629 		Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::getType()));
   6630 
   6631 		return As<Float4>(result);
   6632 	}
   6633 
   6634 	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
   6635 	{
   6636 #if defined(__i386__) || defined(__x86_64__)
   6637 		return x86::maxps(x, y);
   6638 #else
   6639 		return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
   6640 #endif
   6641 	}
   6642 
   6643 	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
   6644 	{
   6645 #if defined(__i386__) || defined(__x86_64__)
   6646 		return x86::minps(x, y);
   6647 #else
   6648 		return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
   6649 #endif
   6650 	}
   6651 
   6652 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
   6653 	{
   6654 #if defined(__i386__) || defined(__x86_64__)
   6655 		if(exactAtPow2)
   6656 		{
   6657 			// rcpps uses a piecewise-linear approximation which minimizes the relative error
   6658 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   6659 			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   6660 		}
   6661 		return x86::rcpps(x);
   6662 #else
   6663 		return As<Float4>(V(lowerRCP(V(x.value))));
   6664 #endif
   6665 	}
   6666 
   6667 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
   6668 	{
   6669 #if defined(__i386__) || defined(__x86_64__)
   6670 		return x86::rsqrtps(x);
   6671 #else
   6672 		return As<Float4>(V(lowerRSQRT(V(x.value))));
   6673 #endif
   6674 	}
   6675 
   6676 	RValue<Float4> Sqrt(RValue<Float4> x)
   6677 	{
   6678 #if defined(__i386__) || defined(__x86_64__)
   6679 		return x86::sqrtps(x);
   6680 #else
   6681 		return As<Float4>(V(lowerSQRT(V(x.value))));
   6682 #endif
   6683 	}
   6684 
   6685 	RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
   6686 	{
   6687 		return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
   6688 	}
   6689 
   6690 	RValue<Float> Extract(RValue<Float4> x, int i)
   6691 	{
   6692 		return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
   6693 	}
   6694 
   6695 	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
   6696 	{
   6697 		return RValue<Float4>(createSwizzle4(x.value, select));
   6698 	}
   6699 
   6700 	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
   6701 	{
   6702 		int shuffle[4] =
   6703 		{
   6704 			((imm >> 0) & 0x03) + 0,
   6705 			((imm >> 2) & 0x03) + 0,
   6706 			((imm >> 4) & 0x03) + 4,
   6707 			((imm >> 6) & 0x03) + 4,
   6708 		};
   6709 
   6710 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   6711 	}
   6712 
   6713 	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
   6714 	{
   6715 		int shuffle[4] = {0, 4, 1, 5};
   6716 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   6717 	}
   6718 
   6719 	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
   6720 	{
   6721 		int shuffle[4] = {2, 6, 3, 7};
   6722 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   6723 	}
   6724 
   6725 	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
   6726 	{
   6727 		Value *vector = lhs.loadValue();
   6728 		Value *result = createMask4(vector, rhs.value, select);
   6729 		lhs.storeValue(result);
   6730 
   6731 		return RValue<Float4>(result);
   6732 	}
   6733 
   6734 	RValue<Int> SignMask(RValue<Float4> x)
   6735 	{
   6736 #if defined(__i386__) || defined(__x86_64__)
   6737 		return x86::movmskps(x);
   6738 #else
   6739 		return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
   6740 #endif
   6741 	}
   6742 
   6743 	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
   6744 	{
   6745 	//	return As<Int4>(x86::cmpeqps(x, y));
   6746 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
   6747 	}
   6748 
   6749 	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
   6750 	{
   6751 	//	return As<Int4>(x86::cmpltps(x, y));
   6752 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
   6753 	}
   6754 
   6755 	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
   6756 	{
   6757 	//	return As<Int4>(x86::cmpleps(x, y));
   6758 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
   6759 	}
   6760 
   6761 	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
   6762 	{
   6763 	//	return As<Int4>(x86::cmpneqps(x, y));
   6764 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
   6765 	}
   6766 
   6767 	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
   6768 	{
   6769 	//	return As<Int4>(x86::cmpnltps(x, y));
   6770 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
   6771 	}
   6772 
   6773 	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
   6774 	{
   6775 	//	return As<Int4>(x86::cmpnleps(x, y));
   6776 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
   6777 	}
   6778 
   6779 	RValue<Int4> IsInf(RValue<Float4> x)
   6780 	{
   6781 		return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000));
   6782 	}
   6783 
   6784 	RValue<Int4> IsNan(RValue<Float4> x)
   6785 	{
   6786 		return ~CmpEQ(x, x);
   6787 	}
   6788 
   6789 	RValue<Float4> Round(RValue<Float4> x)
   6790 	{
   6791 #if defined(__i386__) || defined(__x86_64__)
   6792 		if(CPUID::supportsSSE4_1())
   6793 		{
   6794 			return x86::roundps(x, 0);
   6795 		}
   6796 		else
   6797 		{
   6798 			return Float4(RoundInt(x));
   6799 		}
   6800 #else
   6801 		return RValue<Float4>(V(lowerRound(V(x.value))));
   6802 #endif
   6803 	}
   6804 
   6805 	RValue<Float4> Trunc(RValue<Float4> x)
   6806 	{
   6807 #if defined(__i386__) || defined(__x86_64__)
   6808 		if(CPUID::supportsSSE4_1())
   6809 		{
   6810 			return x86::roundps(x, 3);
   6811 		}
   6812 		else
   6813 		{
   6814 			return Float4(Int4(x));
   6815 		}
   6816 #else
   6817 		return RValue<Float4>(V(lowerTrunc(V(x.value))));
   6818 #endif
   6819 	}
   6820 
   6821 	RValue<Float4> Frac(RValue<Float4> x)
   6822 	{
   6823 		Float4 frc;
   6824 
   6825 #if defined(__i386__) || defined(__x86_64__)
   6826 		if(CPUID::supportsSSE4_1())
   6827 		{
   6828 			frc = x - Floor(x);
   6829 		}
   6830 		else
   6831 		{
   6832 			frc = x - Float4(Int4(x));   // Signed fractional part.
   6833 
   6834 			frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
   6835 		}
   6836 #else
   6837 		frc = x - Floor(x);
   6838 #endif
   6839 
   6840 		// x - floor(x) can be 1.0 for very small negative x.
   6841 		// Clamp against the value just below 1.0.
   6842 		return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
   6843 	}
   6844 
   6845 	RValue<Float4> Floor(RValue<Float4> x)
   6846 	{
   6847 #if defined(__i386__) || defined(__x86_64__)
   6848 		if(CPUID::supportsSSE4_1())
   6849 		{
   6850 			return x86::floorps(x);
   6851 		}
   6852 		else
   6853 		{
   6854 			return x - Frac(x);
   6855 		}
   6856 #else
   6857 		return RValue<Float4>(V(lowerFloor(V(x.value))));
   6858 #endif
   6859 	}
   6860 
   6861 	RValue<Float4> Ceil(RValue<Float4> x)
   6862 	{
   6863 #if defined(__i386__) || defined(__x86_64__)
   6864 		if(CPUID::supportsSSE4_1())
   6865 		{
   6866 			return x86::ceilps(x);
   6867 		}
   6868 		else
   6869 #endif
   6870 		{
   6871 			return -Floor(-x);
   6872 		}
   6873 	}
   6874 
   6875 	Type *Float4::getType()
   6876 	{
   6877 		return T(llvm::VectorType::get(T(Float::getType()), 4));
   6878 	}
   6879 
   6880 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
   6881 	{
   6882 		return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
   6883 	}
   6884 
   6885 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   6886 	{
   6887 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
   6888 	}
   6889 
   6890 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   6891 	{
   6892 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
   6893 	}
   6894 
   6895 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
   6896 	{
   6897 		return lhs = lhs + offset;
   6898 	}
   6899 
   6900 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
   6901 	{
   6902 		return lhs = lhs + offset;
   6903 	}
   6904 
   6905 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
   6906 	{
   6907 		return lhs = lhs + offset;
   6908 	}
   6909 
   6910 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
   6911 	{
   6912 		return lhs + -offset;
   6913 	}
   6914 
   6915 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   6916 	{
   6917 		return lhs + -offset;
   6918 	}
   6919 
   6920 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   6921 	{
   6922 		return lhs + -offset;
   6923 	}
   6924 
   6925 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
   6926 	{
   6927 		return lhs = lhs - offset;
   6928 	}
   6929 
   6930 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
   6931 	{
   6932 		return lhs = lhs - offset;
   6933 	}
   6934 
   6935 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
   6936 	{
   6937 		return lhs = lhs - offset;
   6938 	}
   6939 
   6940 	void Return()
   6941 	{
   6942 		Nucleus::createRetVoid();
   6943 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   6944 		Nucleus::createUnreachable();
   6945 	}
   6946 
   6947 	void Return(RValue<Int> ret)
   6948 	{
   6949 		Nucleus::createRet(ret.value);
   6950 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   6951 		Nucleus::createUnreachable();
   6952 	}
   6953 
   6954 	void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
   6955 	{
   6956 		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
   6957 		Nucleus::setInsertBlock(bodyBB);
   6958 	}
   6959 
   6960 	RValue<Long> Ticks()
   6961 	{
   6962 		llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
   6963 
   6964 		return RValue<Long>(V(::builder->CreateCall(rdtsc)));
   6965 	}
   6966 }
   6967 
   6968 namespace rr
   6969 {
   6970 #if defined(__i386__) || defined(__x86_64__)
   6971 	namespace x86
   6972 	{
   6973 		RValue<Int> cvtss2si(RValue<Float> val)
   6974 		{
   6975 			llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
   6976 
   6977 			Float4 vector;
   6978 			vector.x = val;
   6979 
   6980 			return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
   6981 		}
   6982 
   6983 		RValue<Int4> cvtps2dq(RValue<Float4> val)
   6984 		{
   6985 			llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
   6986 
   6987 			return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
   6988 		}
   6989 
   6990 		RValue<Float> rcpss(RValue<Float> val)
   6991 		{
   6992 			llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
   6993 
   6994 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   6995 
   6996 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
   6997 		}
   6998 
   6999 		RValue<Float> sqrtss(RValue<Float> val)
   7000 		{
   7001 #if REACTOR_LLVM_VERSION < 7
   7002 			llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
   7003 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   7004 
   7005 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0));
   7006 #else
   7007 			llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
   7008 			return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
   7009 #endif
   7010 		}
   7011 
   7012 		RValue<Float> rsqrtss(RValue<Float> val)
   7013 		{
   7014 			llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
   7015 
   7016 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   7017 
   7018 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
   7019 		}
   7020 
   7021 		RValue<Float4> rcpps(RValue<Float4> val)
   7022 		{
   7023 			llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
   7024 
   7025 			return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
   7026 		}
   7027 
   7028 		RValue<Float4> sqrtps(RValue<Float4> val)
   7029 		{
   7030 #if REACTOR_LLVM_VERSION < 7
   7031 			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
   7032 #else
   7033 			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
   7034 #endif
   7035 
   7036 			return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
   7037 		}
   7038 
   7039 		RValue<Float4> rsqrtps(RValue<Float4> val)
   7040 		{
   7041 			llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
   7042 
   7043 			return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
   7044 		}
   7045 
   7046 		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
   7047 		{
   7048 			llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
   7049 
   7050 			return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
   7051 		}
   7052 
   7053 		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
   7054 		{
   7055 			llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
   7056 
   7057 			return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
   7058 		}
   7059 
   7060 		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
   7061 		{
   7062 			llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
   7063 
   7064 			Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
   7065 			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
   7066 
   7067 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
   7068 		}
   7069 
   7070 		RValue<Float> floorss(RValue<Float> val)
   7071 		{
   7072 			return roundss(val, 1);
   7073 		}
   7074 
   7075 		RValue<Float> ceilss(RValue<Float> val)
   7076 		{
   7077 			return roundss(val, 2);
   7078 		}
   7079 
   7080 		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
   7081 		{
   7082 			llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
   7083 
   7084 			return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
   7085 		}
   7086 
   7087 		RValue<Float4> floorps(RValue<Float4> val)
   7088 		{
   7089 			return roundps(val, 1);
   7090 		}
   7091 
   7092 		RValue<Float4> ceilps(RValue<Float4> val)
   7093 		{
   7094 			return roundps(val, 2);
   7095 		}
   7096 
   7097 		RValue<Int4> pabsd(RValue<Int4> x)
   7098 		{
   7099 #if REACTOR_LLVM_VERSION < 7
   7100 			llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
   7101 
   7102 			return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value)))));
   7103 #else
   7104 			return RValue<Int4>(V(lowerPABS(V(x.value))));
   7105 #endif
   7106 		}
   7107 
   7108 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
   7109 		{
   7110 			llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
   7111 
   7112 			return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
   7113 		}
   7114 
   7115 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
   7116 		{
   7117 			llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
   7118 
   7119 			return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
   7120 		}
   7121 
   7122 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
   7123 		{
   7124 			llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
   7125 
   7126 			return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
   7127 		}
   7128 
   7129 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
   7130 		{
   7131 			llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
   7132 
   7133 			return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
   7134 		}
   7135 
   7136 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
   7137 		{
   7138 			llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
   7139 
   7140 			return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
   7141 		}
   7142 
   7143 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
   7144 		{
   7145 			llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
   7146 
   7147 			return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
   7148 		}
   7149 
   7150 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
   7151 		{
   7152 			llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
   7153 
   7154 			return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
   7155 		}
   7156 
   7157 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
   7158 		{
   7159 			llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
   7160 
   7161 			return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
   7162 		}
   7163 
   7164 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
   7165 		{
   7166 #if REACTOR_LLVM_VERSION < 7
   7167 			llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
   7168 
   7169 			return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value)))));
   7170 #else
   7171 			return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
   7172 #endif
   7173 		}
   7174 
   7175 		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
   7176 		{
   7177 #if REACTOR_LLVM_VERSION < 7
   7178 			llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
   7179 
   7180 			return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value)))));
   7181 #else
   7182 			return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
   7183 #endif
   7184 		}
   7185 
   7186 		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
   7187 		{
   7188 #if REACTOR_LLVM_VERSION < 7
   7189 			llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
   7190 
   7191 			return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value)))));
   7192 #else
   7193 			return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
   7194 #endif
   7195 		}
   7196 
   7197 		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
   7198 		{
   7199 #if REACTOR_LLVM_VERSION < 7
   7200 			llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
   7201 
   7202 			return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value)))));
   7203 #else
   7204 			return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
   7205 #endif
   7206 		}
   7207 
   7208 		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
   7209 		{
   7210 #if REACTOR_LLVM_VERSION < 7
   7211 			llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
   7212 
   7213 			return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value)))));
   7214 #else
   7215 			return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
   7216 #endif
   7217 		}
   7218 
   7219 		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
   7220 		{
   7221 #if REACTOR_LLVM_VERSION < 7
   7222 			llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
   7223 
   7224 			return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value)))));
   7225 #else
   7226 			return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
   7227 #endif
   7228 		}
   7229 
   7230 		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
   7231 		{
   7232 #if REACTOR_LLVM_VERSION < 7
   7233 			llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
   7234 
   7235 			return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value)))));
   7236 #else
   7237 			return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
   7238 #endif
   7239 		}
   7240 
   7241 		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
   7242 		{
   7243 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
   7244 
   7245 			return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
   7246 		}
   7247 
   7248 		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
   7249 		{
   7250 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
   7251 
   7252 			return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
   7253 		}
   7254 
   7255 		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
   7256 		{
   7257 			llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
   7258 
   7259 			return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
   7260 		}
   7261 
   7262 		RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
   7263 		{
   7264 			llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
   7265 
   7266 			return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
   7267 		}
   7268 
   7269 		RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
   7270 		{
   7271 			if(CPUID::supportsSSE4_1())
   7272 			{
   7273 				llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
   7274 
   7275 				return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
   7276 			}
   7277 			else
   7278 			{
   7279 				RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
   7280 				RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
   7281 
   7282 				return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
   7283 			}
   7284 		}
   7285 
   7286 		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
   7287 		{
   7288 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
   7289 
   7290 			return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7291 		}
   7292 
   7293 		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
   7294 		{
   7295 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
   7296 
   7297 			return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7298 		}
   7299 
   7300 		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
   7301 		{
   7302 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
   7303 
   7304 			return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7305 		}
   7306 
   7307 		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
   7308 		{
   7309 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
   7310 
   7311 			return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7312 		}
   7313 
   7314 		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
   7315 		{
   7316 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
   7317 
   7318 			return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7319 		}
   7320 
   7321 		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
   7322 		{
   7323 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
   7324 
   7325 			return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7326 		}
   7327 
   7328 		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
   7329 		{
   7330 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
   7331 
   7332 			return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7333 		}
   7334 
   7335 		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
   7336 		{
   7337 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
   7338 
   7339 			return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7340 		}
   7341 
   7342 		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
   7343 		{
   7344 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
   7345 
   7346 			return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7347 		}
   7348 
   7349 		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
   7350 		{
   7351 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
   7352 
   7353 			return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7354 		}
   7355 
   7356 		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
   7357 		{
   7358 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
   7359 
   7360 			return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7361 		}
   7362 
   7363 		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
   7364 		{
   7365 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
   7366 
   7367 			return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
   7368 		}
   7369 
   7370 		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
   7371 		{
   7372 #if REACTOR_LLVM_VERSION < 7
   7373 			llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
   7374 
   7375 			return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value)))));
   7376 #else
   7377 			return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
   7378 #endif
   7379 		}
   7380 
   7381 		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
   7382 		{
   7383 #if REACTOR_LLVM_VERSION < 7
   7384 			llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
   7385 
   7386 			return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value)))));
   7387 #else
   7388 			return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
   7389 #endif
   7390 		}
   7391 
   7392 		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
   7393 		{
   7394 #if REACTOR_LLVM_VERSION < 7
   7395 			llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
   7396 
   7397 			return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value)))));
   7398 #else
   7399 			return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
   7400 #endif
   7401 		}
   7402 
   7403 		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
   7404 		{
   7405 #if REACTOR_LLVM_VERSION < 7
   7406 			llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
   7407 
   7408 			return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value)))));
   7409 #else
   7410 			return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
   7411 #endif
   7412 		}
   7413 
   7414 		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
   7415 		{
   7416 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
   7417 
   7418 			return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
   7419 		}
   7420 
   7421 		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
   7422 		{
   7423 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
   7424 
   7425 			return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
   7426 		}
   7427 
   7428 		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
   7429 		{
   7430 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
   7431 
   7432 			return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
   7433 		}
   7434 
   7435 		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
   7436 		{
   7437 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
   7438 
   7439 			return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
   7440 		}
   7441 
   7442 		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
   7443 		{
   7444 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
   7445 
   7446 			return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
   7447 		}
   7448 
   7449 		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
   7450 		{
   7451 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
   7452 
   7453 			return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
   7454 		}
   7455 
   7456 		RValue<Int> movmskps(RValue<Float4> x)
   7457 		{
   7458 			llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
   7459 
   7460 			return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
   7461 		}
   7462 
   7463 		RValue<Int> pmovmskb(RValue<Byte8> x)
   7464 		{
   7465 			llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
   7466 
   7467 			return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
   7468 		}
   7469 
   7470 		RValue<Int4> pmovzxbd(RValue<Byte16> x)
   7471 		{
   7472 #if REACTOR_LLVM_VERSION < 7
   7473 			llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
   7474 
   7475 			return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value)))));
   7476 #else
   7477 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
   7478 #endif
   7479 		}
   7480 
   7481 		RValue<Int4> pmovsxbd(RValue<SByte16> x)
   7482 		{
   7483 #if REACTOR_LLVM_VERSION < 7
   7484 			llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
   7485 
   7486 			return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value)))));
   7487 #else
   7488 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
   7489 #endif
   7490 		}
   7491 
   7492 		RValue<Int4> pmovzxwd(RValue<UShort8> x)
   7493 		{
   7494 #if REACTOR_LLVM_VERSION < 7
   7495 			llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
   7496 
   7497 			return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value)))));
   7498 #else
   7499 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
   7500 #endif
   7501 		}
   7502 
   7503 		RValue<Int4> pmovsxwd(RValue<Short8> x)
   7504 		{
   7505 #if REACTOR_LLVM_VERSION < 7
   7506 			llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
   7507 
   7508 			return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value)))));
   7509 #else
   7510 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
   7511 #endif
   7512 		}
   7513 	}
   7514 #endif  // defined(__i386__) || defined(__x86_64__)
   7515 }
   7516