Home | History | Annotate | Download | only in Reactor
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Reactor.hpp"
     16 
     17 #include "llvm/Support/IRBuilder.h"
     18 #include "llvm/Function.h"
     19 #include "llvm/GlobalVariable.h"
     20 #include "llvm/Module.h"
     21 #include "llvm/LLVMContext.h"
     22 #include "llvm/Constants.h"
     23 #include "llvm/Intrinsics.h"
     24 #include "llvm/PassManager.h"
     25 #include "llvm/Analysis/LoopPass.h"
     26 #include "llvm/Transforms/Scalar.h"
     27 #include "llvm/Target/TargetData.h"
     28 #include "llvm/Target/TargetOptions.h"
     29 #include "llvm/Support/TargetSelect.h"
     30 #include "../lib/ExecutionEngine/JIT/JIT.h"
     31 
     32 #include "LLVMRoutine.hpp"
     33 #include "LLVMRoutineManager.hpp"
     34 #include "x86.hpp"
     35 #include "Common/CPUID.hpp"
     36 #include "Common/Thread.hpp"
     37 #include "Common/Memory.hpp"
     38 #include "Common/MutexLock.hpp"
     39 
     40 #include <fstream>
     41 
     42 #if defined(__i386__) || defined(__x86_64__)
     43 #include <xmmintrin.h>
     44 #endif
     45 
     46 #if defined(__x86_64__) && defined(_WIN32)
     47 extern "C" void X86CompilationCallback()
     48 {
     49 	assert(false);   // UNIMPLEMENTED
     50 }
     51 #endif
     52 
     53 extern "C"
     54 {
     55 	bool (*CodeAnalystInitialize)() = 0;
     56 	void (*CodeAnalystCompleteJITLog)() = 0;
     57 	bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
     58 }
     59 
     60 namespace llvm
     61 {
     62 	extern bool JITEmitDebugInfo;
     63 }
     64 
     65 namespace
     66 {
     67 	sw::LLVMRoutineManager *routineManager = nullptr;
     68 	llvm::ExecutionEngine *executionEngine = nullptr;
     69 	llvm::IRBuilder<> *builder = nullptr;
     70 	llvm::LLVMContext *context = nullptr;
     71 	llvm::Module *module = nullptr;
     72 	llvm::Function *function = nullptr;
     73 
     74 	sw::MutexLock codegenMutex;
     75 }
     76 
     77 namespace sw
     78 {
     79 	Optimization optimization[10] = {InstructionCombining, Disabled};
     80 
     81 	enum EmulatedType
     82 	{
     83 		Type_v2i32,
     84 		Type_v4i16,
     85 		Type_v2i16,
     86 		Type_v8i8,
     87 		Type_v4i8,
     88 		Type_v2f32,
     89 		EmulatedTypeCount
     90 	};
     91 
     92 	class Value : public llvm::Value {};
     93 	class SwitchCases : public llvm::SwitchInst {};
     94 	class BasicBlock : public llvm::BasicBlock {};
     95 
     96 	llvm::Type *T(Type *t)
     97 	{
     98 		uintptr_t type = reinterpret_cast<uintptr_t>(t);
     99 		if(type < EmulatedTypeCount)
    100 		{
    101 			// Use 128-bit vectors to implement logically shorter ones.
    102 			switch(type)
    103 			{
    104 			case Type_v2i32: return T(Int4::getType());
    105 			case Type_v4i16: return T(Short8::getType());
    106 			case Type_v2i16: return T(Short8::getType());
    107 			case Type_v8i8:  return T(Byte16::getType());
    108 			case Type_v4i8:  return T(Byte16::getType());
    109 			case Type_v2f32: return T(Float4::getType());
    110 			default: assert(false);
    111 			}
    112 		}
    113 
    114 		return reinterpret_cast<llvm::Type*>(t);
    115 	}
    116 
    117 	inline Type *T(llvm::Type *t)
    118 	{
    119 		return reinterpret_cast<Type*>(t);
    120 	}
    121 
    122 	Type *T(EmulatedType t)
    123 	{
    124 		return reinterpret_cast<Type*>(t);
    125 	}
    126 
    127 	inline Value *V(llvm::Value *t)
    128 	{
    129 		return reinterpret_cast<Value*>(t);
    130 	}
    131 
    132 	inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
    133 	{
    134 		return reinterpret_cast<std::vector<llvm::Type*>&>(t);
    135 	}
    136 
    137 	inline BasicBlock *B(llvm::BasicBlock *t)
    138 	{
    139 		return reinterpret_cast<BasicBlock*>(t);
    140 	}
    141 
    142 	static size_t typeSize(Type *type)
    143 	{
    144 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    145 		if(t < EmulatedTypeCount)
    146 		{
    147 			switch(t)
    148 			{
    149 			case Type_v2i32: return 8;
    150 			case Type_v4i16: return 8;
    151 			case Type_v2i16: return 4;
    152 			case Type_v8i8:  return 8;
    153 			case Type_v4i8:  return 4;
    154 			case Type_v2f32: return 8;
    155 			default: assert(false);
    156 			}
    157 		}
    158 
    159 		return T(type)->getPrimitiveSizeInBits() / 8;
    160 	}
    161 
    162 	static unsigned int elementCount(Type *type)
    163 	{
    164 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    165 		if(t < EmulatedTypeCount)
    166 		{
    167 			switch(t)
    168 			{
    169 			case Type_v2i32: return 2;
    170 			case Type_v4i16: return 4;
    171 			case Type_v2i16: return 2;
    172 			case Type_v8i8:  return 8;
    173 			case Type_v4i8:  return 4;
    174 			case Type_v2f32: return 2;
    175 			default: assert(false);
    176 			}
    177 		}
    178 
    179 		return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
    180 	}
    181 
    182 	Nucleus::Nucleus()
    183 	{
    184 		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
    185 
    186 		llvm::InitializeNativeTarget();
    187 		llvm::JITEmitDebugInfo = false;
    188 
    189 		if(!::context)
    190 		{
    191 			::context = new llvm::LLVMContext();
    192 		}
    193 
    194 		::module = new llvm::Module("", *::context);
    195 		::routineManager = new LLVMRoutineManager();
    196 
    197 		#if defined(__x86_64__)
    198 			const char *architecture = "x86-64";
    199 		#else
    200 			const char *architecture = "x86";
    201 		#endif
    202 
    203 		llvm::SmallVector<std::string, 1> MAttrs;
    204 		MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
    205 		MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
    206 		MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
    207 		MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
    208 		MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
    209 		MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
    210 		MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
    211 
    212 		std::string error;
    213 		llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error);
    214 		::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine);
    215 
    216 		if(!::builder)
    217 		{
    218 			::builder = new llvm::IRBuilder<>(*::context);
    219 
    220 			#if defined(_WIN32)
    221 				HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
    222 				if(CodeAnalyst)
    223 				{
    224 					CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
    225 					CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
    226 					CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
    227 
    228 					CodeAnalystInitialize();
    229 				}
    230 			#endif
    231 		}
    232 	}
    233 
    234 	Nucleus::~Nucleus()
    235 	{
    236 		delete ::executionEngine;
    237 		::executionEngine = nullptr;
    238 
    239 		::routineManager = nullptr;
    240 		::function = nullptr;
    241 		::module = nullptr;
    242 
    243 		::codegenMutex.unlock();
    244 	}
    245 
    246 	Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
    247 	{
    248 		if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
    249 		{
    250 			llvm::Type *type = ::function->getReturnType();
    251 
    252 			if(type->isVoidTy())
    253 			{
    254 				createRetVoid();
    255 			}
    256 			else
    257 			{
    258 				createRet(V(llvm::UndefValue::get(type)));
    259 			}
    260 		}
    261 
    262 		if(false)
    263 		{
    264 			std::string error;
    265 			llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error);
    266 			::module->print(file, 0);
    267 		}
    268 
    269 		if(runOptimizations)
    270 		{
    271 			optimize();
    272 		}
    273 
    274 		if(false)
    275 		{
    276 			std::string error;
    277 			llvm::raw_fd_ostream file("llvm-dump-opt.txt", error);
    278 			::module->print(file, 0);
    279 		}
    280 
    281 		void *entry = ::executionEngine->getPointerToFunction(::function);
    282 		LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
    283 
    284 		if(CodeAnalystLogJITCode)
    285 		{
    286 			CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
    287 		}
    288 
    289 		return routine;
    290 	}
    291 
    292 	void Nucleus::optimize()
    293 	{
    294 		static llvm::PassManager *passManager = nullptr;
    295 
    296 		if(!passManager)
    297 		{
    298 			passManager = new llvm::PassManager();
    299 
    300 			llvm::UnsafeFPMath = true;
    301 		//	llvm::NoInfsFPMath = true;
    302 		//	llvm::NoNaNsFPMath = true;
    303 
    304 			passManager->add(new llvm::TargetData(*::executionEngine->getTargetData()));
    305 			passManager->add(llvm::createScalarReplAggregatesPass());
    306 
    307 			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
    308 			{
    309 				switch(optimization[pass])
    310 				{
    311 				case Disabled:                                                                       break;
    312 				case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
    313 				case LICM:                 passManager->add(llvm::createLICMPass());                 break;
    314 				case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
    315 				case GVN:                  passManager->add(llvm::createGVNPass());                  break;
    316 				case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
    317 				case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
    318 				case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
    319 				case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
    320 				case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
    321 				default:
    322 					assert(false);
    323 				}
    324 			}
    325 		}
    326 
    327 		passManager->run(*::module);
    328 	}
    329 
    330 	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
    331 	{
    332 		// Need to allocate it in the entry block for mem2reg to work
    333 		llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
    334 
    335 		llvm::Instruction *declaration;
    336 
    337 		if(arraySize)
    338 		{
    339 			declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize));
    340 		}
    341 		else
    342 		{
    343 			declaration = new llvm::AllocaInst(T(type), (Value*)nullptr);
    344 		}
    345 
    346 		entryBlock.getInstList().push_front(declaration);
    347 
    348 		return V(declaration);
    349 	}
    350 
    351 	BasicBlock *Nucleus::createBasicBlock()
    352 	{
    353 		return B(BasicBlock::Create(*::context, "", ::function));
    354 	}
    355 
    356 	BasicBlock *Nucleus::getInsertBlock()
    357 	{
    358 		return B(::builder->GetInsertBlock());
    359 	}
    360 
    361 	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
    362 	{
    363 	//	assert(::builder->GetInsertBlock()->back().isTerminator());
    364 		return ::builder->SetInsertPoint(basicBlock);
    365 	}
    366 
    367 	void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
    368 	{
    369 		llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
    370 		::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
    371 		::function->setCallingConv(llvm::CallingConv::C);
    372 
    373 		::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
    374 	}
    375 
    376 	Value *Nucleus::getArgument(unsigned int index)
    377 	{
    378 		llvm::Function::arg_iterator args = ::function->arg_begin();
    379 
    380 		while(index)
    381 		{
    382 			args++;
    383 			index--;
    384 		}
    385 
    386 		return V(&*args);
    387 	}
    388 
    389 	void Nucleus::createRetVoid()
    390 	{
    391 		::builder->CreateRetVoid();
    392 	}
    393 
    394 	void Nucleus::createRet(Value *v)
    395 	{
    396 		::builder->CreateRet(v);
    397 	}
    398 
    399 	void Nucleus::createBr(BasicBlock *dest)
    400 	{
    401 		::builder->CreateBr(dest);
    402 	}
    403 
    404 	void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
    405 	{
    406 		::builder->CreateCondBr(cond, ifTrue, ifFalse);
    407 	}
    408 
    409 	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
    410 	{
    411 		return V(::builder->CreateAdd(lhs, rhs));
    412 	}
    413 
    414 	Value *Nucleus::createSub(Value *lhs, Value *rhs)
    415 	{
    416 		return V(::builder->CreateSub(lhs, rhs));
    417 	}
    418 
    419 	Value *Nucleus::createMul(Value *lhs, Value *rhs)
    420 	{
    421 		return V(::builder->CreateMul(lhs, rhs));
    422 	}
    423 
    424 	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
    425 	{
    426 		return V(::builder->CreateUDiv(lhs, rhs));
    427 	}
    428 
    429 	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
    430 	{
    431 		return V(::builder->CreateSDiv(lhs, rhs));
    432 	}
    433 
    434 	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
    435 	{
    436 		return V(::builder->CreateFAdd(lhs, rhs));
    437 	}
    438 
    439 	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
    440 	{
    441 		return V(::builder->CreateFSub(lhs, rhs));
    442 	}
    443 
    444 	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
    445 	{
    446 		return V(::builder->CreateFMul(lhs, rhs));
    447 	}
    448 
    449 	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
    450 	{
    451 		return V(::builder->CreateFDiv(lhs, rhs));
    452 	}
    453 
    454 	Value *Nucleus::createURem(Value *lhs, Value *rhs)
    455 	{
    456 		return V(::builder->CreateURem(lhs, rhs));
    457 	}
    458 
    459 	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
    460 	{
    461 		return V(::builder->CreateSRem(lhs, rhs));
    462 	}
    463 
    464 	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
    465 	{
    466 		return V(::builder->CreateFRem(lhs, rhs));
    467 	}
    468 
    469 	Value *Nucleus::createShl(Value *lhs, Value *rhs)
    470 	{
    471 		return V(::builder->CreateShl(lhs, rhs));
    472 	}
    473 
    474 	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
    475 	{
    476 		return V(::builder->CreateLShr(lhs, rhs));
    477 	}
    478 
    479 	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
    480 	{
    481 		return V(::builder->CreateAShr(lhs, rhs));
    482 	}
    483 
    484 	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
    485 	{
    486 		return V(::builder->CreateAnd(lhs, rhs));
    487 	}
    488 
    489 	Value *Nucleus::createOr(Value *lhs, Value *rhs)
    490 	{
    491 		return V(::builder->CreateOr(lhs, rhs));
    492 	}
    493 
    494 	Value *Nucleus::createXor(Value *lhs, Value *rhs)
    495 	{
    496 		return V(::builder->CreateXor(lhs, rhs));
    497 	}
    498 
    499 	Value *Nucleus::createNeg(Value *v)
    500 	{
    501 		return V(::builder->CreateNeg(v));
    502 	}
    503 
    504 	Value *Nucleus::createFNeg(Value *v)
    505 	{
    506 		return V(::builder->CreateFNeg(v));
    507 	}
    508 
    509 	Value *Nucleus::createNot(Value *v)
    510 	{
    511 		return V(::builder->CreateNot(v));
    512 	}
    513 
    514 	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
    515 	{
    516 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    517 		if(t < EmulatedTypeCount)
    518 		{
    519 			switch(t)
    520 			{
    521 			case Type_v2i32:
    522 			case Type_v4i16:
    523 			case Type_v8i8:
    524 			case Type_v2f32:
    525 				return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type)));
    526 			case Type_v2i16:
    527 			case Type_v4i8:
    528 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
    529 				{
    530 					Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
    531 					Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment));
    532 					i = createZExt(i, Long::getType());
    533 					Value *v = V(createInsertElement(u, i, 0));
    534 					return createBitCast(v, T(T(type)));
    535 				}
    536 				break;
    537 			default:
    538 				assert(false);
    539 			}
    540 		}
    541 
    542 		assert(ptr->getType()->getContainedType(0) == T(type));
    543 		return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment)));
    544 	}
    545 
    546 	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
    547 	{
    548 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
    549 		if(t < EmulatedTypeCount)
    550 		{
    551 			switch(t)
    552 			{
    553 			case Type_v2i32:
    554 			case Type_v4i16:
    555 			case Type_v8i8:
    556 			case Type_v2f32:
    557 				createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment);
    558 				return value;
    559 			case Type_v2i16:
    560 			case Type_v4i8:
    561 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
    562 				{
    563 					createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
    564 					return value;
    565 				}
    566 				break;
    567 			default:
    568 				assert(false);
    569 			}
    570 		}
    571 
    572 		assert(ptr->getType()->getContainedType(0) == T(type));
    573 		::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment));
    574 		return value;
    575 	}
    576 
    577 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
    578 	{
    579 		if(sizeof(void*) == 8)
    580 		{
    581 			if(unsignedIndex)
    582 			{
    583 				index = createZExt(index, Long::getType());
    584 			}
    585 			else
    586 			{
    587 				index = createSExt(index, Long::getType());
    588 			}
    589 
    590 			index = createMul(index, createConstantLong((int64_t)typeSize(type)));
    591 		}
    592 		else
    593 		{
    594 			index = createMul(index, createConstantInt((int)typeSize(type)));
    595 		}
    596 
    597 		assert(ptr->getType()->getContainedType(0) == T(type));
    598 		return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0)));
    599 	}
    600 
    601 	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
    602 	{
    603 		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent));
    604 	}
    605 
    606 	Value *Nucleus::createTrunc(Value *v, Type *destType)
    607 	{
    608 		return V(::builder->CreateTrunc(v, T(destType)));
    609 	}
    610 
    611 	Value *Nucleus::createZExt(Value *v, Type *destType)
    612 	{
    613 		return V(::builder->CreateZExt(v, T(destType)));
    614 	}
    615 
    616 	Value *Nucleus::createSExt(Value *v, Type *destType)
    617 	{
    618 		return V(::builder->CreateSExt(v, T(destType)));
    619 	}
    620 
    621 	Value *Nucleus::createFPToSI(Value *v, Type *destType)
    622 	{
    623 		return V(::builder->CreateFPToSI(v, T(destType)));
    624 	}
    625 
    626 	Value *Nucleus::createSIToFP(Value *v, Type *destType)
    627 	{
    628 		return V(::builder->CreateSIToFP(v, T(destType)));
    629 	}
    630 
    631 	Value *Nucleus::createFPTrunc(Value *v, Type *destType)
    632 	{
    633 		return V(::builder->CreateFPTrunc(v, T(destType)));
    634 	}
    635 
    636 	Value *Nucleus::createFPExt(Value *v, Type *destType)
    637 	{
    638 		return V(::builder->CreateFPExt(v, T(destType)));
    639 	}
    640 
    641 	Value *Nucleus::createBitCast(Value *v, Type *destType)
    642 	{
    643 		// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
    644 		// support for casting between scalars and wide vectors. Emulate them by writing to the stack and
    645 		// reading back as the destination type.
    646 		if(!v->getType()->isVectorTy() && T(destType)->isVectorTy())
    647 		{
    648 			Value *readAddress = allocateStackVariable(destType);
    649 			Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0)));
    650 			createStore(v, writeAddress, T(v->getType()));
    651 			return createLoad(readAddress, destType);
    652 		}
    653 		else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy())
    654 		{
    655 			Value *writeAddress = allocateStackVariable(T(v->getType()));
    656 			createStore(v, writeAddress, T(v->getType()));
    657 			Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
    658 			return createLoad(readAddress, destType);
    659 		}
    660 
    661 		return V(::builder->CreateBitCast(v, T(destType)));
    662 	}
    663 
    664 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
    665 	{
    666 		return V(::builder->CreateICmpEQ(lhs, rhs));
    667 	}
    668 
    669 	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
    670 	{
    671 		return V(::builder->CreateICmpNE(lhs, rhs));
    672 	}
    673 
    674 	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
    675 	{
    676 		return V(::builder->CreateICmpUGT(lhs, rhs));
    677 	}
    678 
    679 	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
    680 	{
    681 		return V(::builder->CreateICmpUGE(lhs, rhs));
    682 	}
    683 
    684 	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
    685 	{
    686 		return V(::builder->CreateICmpULT(lhs, rhs));
    687 	}
    688 
    689 	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
    690 	{
    691 		return V(::builder->CreateICmpULE(lhs, rhs));
    692 	}
    693 
    694 	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
    695 	{
    696 		return V(::builder->CreateICmpSGT(lhs, rhs));
    697 	}
    698 
    699 	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
    700 	{
    701 		return V(::builder->CreateICmpSGE(lhs, rhs));
    702 	}
    703 
    704 	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
    705 	{
    706 		return V(::builder->CreateICmpSLT(lhs, rhs));
    707 	}
    708 
    709 	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
    710 	{
    711 		return V(::builder->CreateICmpSLE(lhs, rhs));
    712 	}
    713 
    714 	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
    715 	{
    716 		return V(::builder->CreateFCmpOEQ(lhs, rhs));
    717 	}
    718 
    719 	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
    720 	{
    721 		return V(::builder->CreateFCmpOGT(lhs, rhs));
    722 	}
    723 
    724 	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
    725 	{
    726 		return V(::builder->CreateFCmpOGE(lhs, rhs));
    727 	}
    728 
    729 	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
    730 	{
    731 		return V(::builder->CreateFCmpOLT(lhs, rhs));
    732 	}
    733 
    734 	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
    735 	{
    736 		return V(::builder->CreateFCmpOLE(lhs, rhs));
    737 	}
    738 
    739 	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
    740 	{
    741 		return V(::builder->CreateFCmpONE(lhs, rhs));
    742 	}
    743 
    744 	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
    745 	{
    746 		return V(::builder->CreateFCmpORD(lhs, rhs));
    747 	}
    748 
    749 	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
    750 	{
    751 		return V(::builder->CreateFCmpUNO(lhs, rhs));
    752 	}
    753 
    754 	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
    755 	{
    756 		return V(::builder->CreateFCmpUEQ(lhs, rhs));
    757 	}
    758 
    759 	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
    760 	{
    761 		return V(::builder->CreateFCmpUGT(lhs, rhs));
    762 	}
    763 
    764 	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
    765 	{
    766 		return V(::builder->CreateFCmpUGE(lhs, rhs));
    767 	}
    768 
    769 	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
    770 	{
    771 		return V(::builder->CreateFCmpULT(lhs, rhs));
    772 	}
    773 
    774 	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
    775 	{
    776 		return V(::builder->CreateFCmpULE(lhs, rhs));
    777 	}
    778 
    779 	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
    780 	{
    781 		return V(::builder->CreateFCmpULE(lhs, rhs));
    782 	}
    783 
    784 	Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
    785 	{
    786 		assert(vector->getType()->getContainedType(0) == T(type));
    787 		return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
    788 	}
    789 
    790 	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
    791 	{
    792 		return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
    793 	}
    794 
    795 	Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
    796 	{
    797 		int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
    798 		const int maxSize = 16;
    799 		llvm::Constant *swizzle[maxSize];
    800 		assert(size <= maxSize);
    801 
    802 		for(int i = 0; i < size; i++)
    803 		{
    804 			swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
    805 		}
    806 
    807 		llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
    808 
    809 		return V(::builder->CreateShuffleVector(V1, V2, shuffle));
    810 	}
    811 
    812 	Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
    813 	{
    814 		return V(::builder->CreateSelect(C, ifTrue, ifFalse));
    815 	}
    816 
    817 	SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
    818 	{
    819 		return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases));
    820 	}
    821 
    822 	void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
    823 	{
    824 		switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch);
    825 	}
    826 
    827 	void Nucleus::createUnreachable()
    828 	{
    829 		::builder->CreateUnreachable();
    830 	}
    831 
    832 	static Value *createSwizzle4(Value *val, unsigned char select)
    833 	{
    834 		int swizzle[4] =
    835 		{
    836 			(select >> 0) & 0x03,
    837 			(select >> 2) & 0x03,
    838 			(select >> 4) & 0x03,
    839 			(select >> 6) & 0x03,
    840 		};
    841 
    842 		return Nucleus::createShuffleVector(val, val, swizzle);
    843 	}
    844 
    845 	static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
    846 	{
    847 		bool mask[4] = {false, false, false, false};
    848 
    849 		mask[(select >> 0) & 0x03] = true;
    850 		mask[(select >> 2) & 0x03] = true;
    851 		mask[(select >> 4) & 0x03] = true;
    852 		mask[(select >> 6) & 0x03] = true;
    853 
    854 		int swizzle[4] =
    855 		{
    856 			mask[0] ? 4 : 0,
    857 			mask[1] ? 5 : 1,
    858 			mask[2] ? 6 : 2,
    859 			mask[3] ? 7 : 3,
    860 		};
    861 
    862 		return Nucleus::createShuffleVector(lhs, rhs, swizzle);
    863 	}
    864 
    865 	Type *Nucleus::getPointerType(Type *ElementType)
    866 	{
    867 		return T(llvm::PointerType::get(T(ElementType), 0));
    868 	}
    869 
    870 	Value *Nucleus::createNullValue(Type *Ty)
    871 	{
    872 		return V(llvm::Constant::getNullValue(T(Ty)));
    873 	}
    874 
    875 	Value *Nucleus::createConstantLong(int64_t i)
    876 	{
    877 		return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
    878 	}
    879 
    880 	Value *Nucleus::createConstantInt(int i)
    881 	{
    882 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
    883 	}
    884 
    885 	Value *Nucleus::createConstantInt(unsigned int i)
    886 	{
    887 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
    888 	}
    889 
    890 	Value *Nucleus::createConstantBool(bool b)
    891 	{
    892 		return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
    893 	}
    894 
    895 	Value *Nucleus::createConstantByte(signed char i)
    896 	{
    897 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
    898 	}
    899 
    900 	Value *Nucleus::createConstantByte(unsigned char i)
    901 	{
    902 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
    903 	}
    904 
    905 	Value *Nucleus::createConstantShort(short i)
    906 	{
    907 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
    908 	}
    909 
    910 	Value *Nucleus::createConstantShort(unsigned short i)
    911 	{
    912 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
    913 	}
    914 
    915 	Value *Nucleus::createConstantFloat(float x)
    916 	{
    917 		return V(llvm::ConstantFP::get(T(Float::getType()), x));
    918 	}
    919 
    920 	Value *Nucleus::createNullPointer(Type *Ty)
    921 	{
    922 		return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
    923 	}
    924 
    925 	Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
    926 	{
    927 		assert(llvm::isa<llvm::VectorType>(T(type)));
    928 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
    929 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
    930 		assert(numElements <= 16 && numConstants <= numElements);
    931 		llvm::Constant *constantVector[16];
    932 
    933 		for(int i = 0; i < numElements; i++)
    934 		{
    935 			constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
    936 		}
    937 
    938 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
    939 	}
    940 
    941 	Value *Nucleus::createConstantVector(const double *constants, Type *type)
    942 	{
    943 		assert(llvm::isa<llvm::VectorType>(T(type)));
    944 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
    945 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
    946 		assert(numElements <= 8 && numConstants <= numElements);
    947 		llvm::Constant *constantVector[8];
    948 
    949 		for(int i = 0; i < numElements; i++)
    950 		{
    951 			constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
    952 		}
    953 
    954 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
    955 	}
    956 
    957 	Type *Void::getType()
    958 	{
    959 		return T(llvm::Type::getVoidTy(*::context));
    960 	}
    961 
    962 	Bool::Bool(Argument<Bool> argument)
    963 	{
    964 		storeValue(argument.value);
    965 	}
    966 
    967 	Bool::Bool(bool x)
    968 	{
    969 		storeValue(Nucleus::createConstantBool(x));
    970 	}
    971 
    972 	Bool::Bool(RValue<Bool> rhs)
    973 	{
    974 		storeValue(rhs.value);
    975 	}
    976 
    977 	Bool::Bool(const Bool &rhs)
    978 	{
    979 		Value *value = rhs.loadValue();
    980 		storeValue(value);
    981 	}
    982 
    983 	Bool::Bool(const Reference<Bool> &rhs)
    984 	{
    985 		Value *value = rhs.loadValue();
    986 		storeValue(value);
    987 	}
    988 
    989 	RValue<Bool> Bool::operator=(RValue<Bool> rhs)
    990 	{
    991 		storeValue(rhs.value);
    992 
    993 		return rhs;
    994 	}
    995 
    996 	RValue<Bool> Bool::operator=(const Bool &rhs)
    997 	{
    998 		Value *value = rhs.loadValue();
    999 		storeValue(value);
   1000 
   1001 		return RValue<Bool>(value);
   1002 	}
   1003 
   1004 	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
   1005 	{
   1006 		Value *value = rhs.loadValue();
   1007 		storeValue(value);
   1008 
   1009 		return RValue<Bool>(value);
   1010 	}
   1011 
   1012 	RValue<Bool> operator!(RValue<Bool> val)
   1013 	{
   1014 		return RValue<Bool>(Nucleus::createNot(val.value));
   1015 	}
   1016 
   1017 	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
   1018 	{
   1019 		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
   1020 	}
   1021 
   1022 	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
   1023 	{
   1024 		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
   1025 	}
   1026 
   1027 	Type *Bool::getType()
   1028 	{
   1029 		return T(llvm::Type::getInt1Ty(*::context));
   1030 	}
   1031 
   1032 	Byte::Byte(Argument<Byte> argument)
   1033 	{
   1034 		storeValue(argument.value);
   1035 	}
   1036 
   1037 	Byte::Byte(RValue<Int> cast)
   1038 	{
   1039 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1040 
   1041 		storeValue(integer);
   1042 	}
   1043 
   1044 	Byte::Byte(RValue<UInt> cast)
   1045 	{
   1046 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1047 
   1048 		storeValue(integer);
   1049 	}
   1050 
   1051 	Byte::Byte(RValue<UShort> cast)
   1052 	{
   1053 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
   1054 
   1055 		storeValue(integer);
   1056 	}
   1057 
   1058 	Byte::Byte(int x)
   1059 	{
   1060 		storeValue(Nucleus::createConstantByte((unsigned char)x));
   1061 	}
   1062 
   1063 	Byte::Byte(unsigned char x)
   1064 	{
   1065 		storeValue(Nucleus::createConstantByte(x));
   1066 	}
   1067 
   1068 	Byte::Byte(RValue<Byte> rhs)
   1069 	{
   1070 		storeValue(rhs.value);
   1071 	}
   1072 
   1073 	Byte::Byte(const Byte &rhs)
   1074 	{
   1075 		Value *value = rhs.loadValue();
   1076 		storeValue(value);
   1077 	}
   1078 
   1079 	Byte::Byte(const Reference<Byte> &rhs)
   1080 	{
   1081 		Value *value = rhs.loadValue();
   1082 		storeValue(value);
   1083 	}
   1084 
   1085 	RValue<Byte> Byte::operator=(RValue<Byte> rhs)
   1086 	{
   1087 		storeValue(rhs.value);
   1088 
   1089 		return rhs;
   1090 	}
   1091 
   1092 	RValue<Byte> Byte::operator=(const Byte &rhs)
   1093 	{
   1094 		Value *value = rhs.loadValue();
   1095 		storeValue(value);
   1096 
   1097 		return RValue<Byte>(value);
   1098 	}
   1099 
   1100 	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
   1101 	{
   1102 		Value *value = rhs.loadValue();
   1103 		storeValue(value);
   1104 
   1105 		return RValue<Byte>(value);
   1106 	}
   1107 
   1108 	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
   1109 	{
   1110 		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
   1111 	}
   1112 
   1113 	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
   1114 	{
   1115 		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
   1116 	}
   1117 
   1118 	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
   1119 	{
   1120 		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
   1121 	}
   1122 
   1123 	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
   1124 	{
   1125 		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
   1126 	}
   1127 
   1128 	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
   1129 	{
   1130 		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
   1131 	}
   1132 
   1133 	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
   1134 	{
   1135 		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
   1136 	}
   1137 
   1138 	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
   1139 	{
   1140 		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
   1141 	}
   1142 
   1143 	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
   1144 	{
   1145 		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
   1146 	}
   1147 
   1148 	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
   1149 	{
   1150 		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
   1151 	}
   1152 
   1153 	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
   1154 	{
   1155 		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
   1156 	}
   1157 
   1158 	RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
   1159 	{
   1160 		return lhs = lhs + rhs;
   1161 	}
   1162 
   1163 	RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
   1164 	{
   1165 		return lhs = lhs - rhs;
   1166 	}
   1167 
   1168 	RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
   1169 	{
   1170 		return lhs = lhs * rhs;
   1171 	}
   1172 
   1173 	RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
   1174 	{
   1175 		return lhs = lhs / rhs;
   1176 	}
   1177 
   1178 	RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
   1179 	{
   1180 		return lhs = lhs % rhs;
   1181 	}
   1182 
   1183 	RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
   1184 	{
   1185 		return lhs = lhs & rhs;
   1186 	}
   1187 
   1188 	RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
   1189 	{
   1190 		return lhs = lhs | rhs;
   1191 	}
   1192 
   1193 	RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
   1194 	{
   1195 		return lhs = lhs ^ rhs;
   1196 	}
   1197 
   1198 	RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
   1199 	{
   1200 		return lhs = lhs << rhs;
   1201 	}
   1202 
   1203 	RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
   1204 	{
   1205 		return lhs = lhs >> rhs;
   1206 	}
   1207 
   1208 	RValue<Byte> operator+(RValue<Byte> val)
   1209 	{
   1210 		return val;
   1211 	}
   1212 
   1213 	RValue<Byte> operator-(RValue<Byte> val)
   1214 	{
   1215 		return RValue<Byte>(Nucleus::createNeg(val.value));
   1216 	}
   1217 
   1218 	RValue<Byte> operator~(RValue<Byte> val)
   1219 	{
   1220 		return RValue<Byte>(Nucleus::createNot(val.value));
   1221 	}
   1222 
   1223 	RValue<Byte> operator++(Byte &val, int)   // Post-increment
   1224 	{
   1225 		RValue<Byte> res = val;
   1226 
   1227 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
   1228 		val.storeValue(inc);
   1229 
   1230 		return res;
   1231 	}
   1232 
   1233 	const Byte &operator++(Byte &val)   // Pre-increment
   1234 	{
   1235 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
   1236 		val.storeValue(inc);
   1237 
   1238 		return val;
   1239 	}
   1240 
   1241 	RValue<Byte> operator--(Byte &val, int)   // Post-decrement
   1242 	{
   1243 		RValue<Byte> res = val;
   1244 
   1245 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
   1246 		val.storeValue(inc);
   1247 
   1248 		return res;
   1249 	}
   1250 
   1251 	const Byte &operator--(Byte &val)   // Pre-decrement
   1252 	{
   1253 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
   1254 		val.storeValue(inc);
   1255 
   1256 		return val;
   1257 	}
   1258 
   1259 	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
   1260 	{
   1261 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   1262 	}
   1263 
   1264 	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
   1265 	{
   1266 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   1267 	}
   1268 
   1269 	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
   1270 	{
   1271 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   1272 	}
   1273 
   1274 	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
   1275 	{
   1276 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   1277 	}
   1278 
   1279 	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
   1280 	{
   1281 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1282 	}
   1283 
   1284 	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
   1285 	{
   1286 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1287 	}
   1288 
   1289 	Type *Byte::getType()
   1290 	{
   1291 		return T(llvm::Type::getInt8Ty(*::context));
   1292 	}
   1293 
   1294 	SByte::SByte(Argument<SByte> argument)
   1295 	{
   1296 		storeValue(argument.value);
   1297 	}
   1298 
   1299 	SByte::SByte(RValue<Int> cast)
   1300 	{
   1301 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1302 
   1303 		storeValue(integer);
   1304 	}
   1305 
   1306 	SByte::SByte(RValue<Short> cast)
   1307 	{
   1308 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1309 
   1310 		storeValue(integer);
   1311 	}
   1312 
   1313 	SByte::SByte(signed char x)
   1314 	{
   1315 		storeValue(Nucleus::createConstantByte(x));
   1316 	}
   1317 
   1318 	SByte::SByte(RValue<SByte> rhs)
   1319 	{
   1320 		storeValue(rhs.value);
   1321 	}
   1322 
   1323 	SByte::SByte(const SByte &rhs)
   1324 	{
   1325 		Value *value = rhs.loadValue();
   1326 		storeValue(value);
   1327 	}
   1328 
   1329 	SByte::SByte(const Reference<SByte> &rhs)
   1330 	{
   1331 		Value *value = rhs.loadValue();
   1332 		storeValue(value);
   1333 	}
   1334 
   1335 	RValue<SByte> SByte::operator=(RValue<SByte> rhs)
   1336 	{
   1337 		storeValue(rhs.value);
   1338 
   1339 		return rhs;
   1340 	}
   1341 
   1342 	RValue<SByte> SByte::operator=(const SByte &rhs)
   1343 	{
   1344 		Value *value = rhs.loadValue();
   1345 		storeValue(value);
   1346 
   1347 		return RValue<SByte>(value);
   1348 	}
   1349 
   1350 	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
   1351 	{
   1352 		Value *value = rhs.loadValue();
   1353 		storeValue(value);
   1354 
   1355 		return RValue<SByte>(value);
   1356 	}
   1357 
   1358 	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
   1359 	{
   1360 		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
   1361 	}
   1362 
   1363 	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
   1364 	{
   1365 		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
   1366 	}
   1367 
   1368 	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
   1369 	{
   1370 		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
   1371 	}
   1372 
   1373 	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
   1374 	{
   1375 		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
   1376 	}
   1377 
   1378 	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
   1379 	{
   1380 		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
   1381 	}
   1382 
   1383 	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
   1384 	{
   1385 		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
   1386 	}
   1387 
   1388 	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
   1389 	{
   1390 		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
   1391 	}
   1392 
   1393 	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
   1394 	{
   1395 		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
   1396 	}
   1397 
   1398 	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
   1399 	{
   1400 		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
   1401 	}
   1402 
   1403 	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
   1404 	{
   1405 		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
   1406 	}
   1407 
   1408 	RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
   1409 	{
   1410 		return lhs = lhs + rhs;
   1411 	}
   1412 
   1413 	RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
   1414 	{
   1415 		return lhs = lhs - rhs;
   1416 	}
   1417 
   1418 	RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
   1419 	{
   1420 		return lhs = lhs * rhs;
   1421 	}
   1422 
   1423 	RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
   1424 	{
   1425 		return lhs = lhs / rhs;
   1426 	}
   1427 
   1428 	RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
   1429 	{
   1430 		return lhs = lhs % rhs;
   1431 	}
   1432 
   1433 	RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
   1434 	{
   1435 		return lhs = lhs & rhs;
   1436 	}
   1437 
   1438 	RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
   1439 	{
   1440 		return lhs = lhs | rhs;
   1441 	}
   1442 
   1443 	RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
   1444 	{
   1445 		return lhs = lhs ^ rhs;
   1446 	}
   1447 
   1448 	RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
   1449 	{
   1450 		return lhs = lhs << rhs;
   1451 	}
   1452 
   1453 	RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
   1454 	{
   1455 		return lhs = lhs >> rhs;
   1456 	}
   1457 
   1458 	RValue<SByte> operator+(RValue<SByte> val)
   1459 	{
   1460 		return val;
   1461 	}
   1462 
   1463 	RValue<SByte> operator-(RValue<SByte> val)
   1464 	{
   1465 		return RValue<SByte>(Nucleus::createNeg(val.value));
   1466 	}
   1467 
   1468 	RValue<SByte> operator~(RValue<SByte> val)
   1469 	{
   1470 		return RValue<SByte>(Nucleus::createNot(val.value));
   1471 	}
   1472 
   1473 	RValue<SByte> operator++(SByte &val, int)   // Post-increment
   1474 	{
   1475 		RValue<SByte> res = val;
   1476 
   1477 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
   1478 		val.storeValue(inc);
   1479 
   1480 		return res;
   1481 	}
   1482 
   1483 	const SByte &operator++(SByte &val)   // Pre-increment
   1484 	{
   1485 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
   1486 		val.storeValue(inc);
   1487 
   1488 		return val;
   1489 	}
   1490 
   1491 	RValue<SByte> operator--(SByte &val, int)   // Post-decrement
   1492 	{
   1493 		RValue<SByte> res = val;
   1494 
   1495 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
   1496 		val.storeValue(inc);
   1497 
   1498 		return res;
   1499 	}
   1500 
   1501 	const SByte &operator--(SByte &val)   // Pre-decrement
   1502 	{
   1503 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
   1504 		val.storeValue(inc);
   1505 
   1506 		return val;
   1507 	}
   1508 
   1509 	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
   1510 	{
   1511 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   1512 	}
   1513 
   1514 	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
   1515 	{
   1516 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   1517 	}
   1518 
   1519 	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
   1520 	{
   1521 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   1522 	}
   1523 
   1524 	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
   1525 	{
   1526 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   1527 	}
   1528 
   1529 	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
   1530 	{
   1531 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1532 	}
   1533 
   1534 	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
   1535 	{
   1536 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1537 	}
   1538 
   1539 	Type *SByte::getType()
   1540 	{
   1541 		return T(llvm::Type::getInt8Ty(*::context));
   1542 	}
   1543 
   1544 	Short::Short(Argument<Short> argument)
   1545 	{
   1546 		storeValue(argument.value);
   1547 	}
   1548 
   1549 	Short::Short(RValue<Int> cast)
   1550 	{
   1551 		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
   1552 
   1553 		storeValue(integer);
   1554 	}
   1555 
   1556 	Short::Short(short x)
   1557 	{
   1558 		storeValue(Nucleus::createConstantShort(x));
   1559 	}
   1560 
   1561 	Short::Short(RValue<Short> rhs)
   1562 	{
   1563 		storeValue(rhs.value);
   1564 	}
   1565 
   1566 	Short::Short(const Short &rhs)
   1567 	{
   1568 		Value *value = rhs.loadValue();
   1569 		storeValue(value);
   1570 	}
   1571 
   1572 	Short::Short(const Reference<Short> &rhs)
   1573 	{
   1574 		Value *value = rhs.loadValue();
   1575 		storeValue(value);
   1576 	}
   1577 
   1578 	RValue<Short> Short::operator=(RValue<Short> rhs)
   1579 	{
   1580 		storeValue(rhs.value);
   1581 
   1582 		return rhs;
   1583 	}
   1584 
   1585 	RValue<Short> Short::operator=(const Short &rhs)
   1586 	{
   1587 		Value *value = rhs.loadValue();
   1588 		storeValue(value);
   1589 
   1590 		return RValue<Short>(value);
   1591 	}
   1592 
   1593 	RValue<Short> Short::operator=(const Reference<Short> &rhs)
   1594 	{
   1595 		Value *value = rhs.loadValue();
   1596 		storeValue(value);
   1597 
   1598 		return RValue<Short>(value);
   1599 	}
   1600 
   1601 	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
   1602 	{
   1603 		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
   1604 	}
   1605 
   1606 	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
   1607 	{
   1608 		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
   1609 	}
   1610 
   1611 	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
   1612 	{
   1613 		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
   1614 	}
   1615 
   1616 	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
   1617 	{
   1618 		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
   1619 	}
   1620 
   1621 	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
   1622 	{
   1623 		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
   1624 	}
   1625 
   1626 	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
   1627 	{
   1628 		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
   1629 	}
   1630 
   1631 	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
   1632 	{
   1633 		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
   1634 	}
   1635 
   1636 	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
   1637 	{
   1638 		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
   1639 	}
   1640 
   1641 	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
   1642 	{
   1643 		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
   1644 	}
   1645 
   1646 	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
   1647 	{
   1648 		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
   1649 	}
   1650 
   1651 	RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
   1652 	{
   1653 		return lhs = lhs + rhs;
   1654 	}
   1655 
   1656 	RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
   1657 	{
   1658 		return lhs = lhs - rhs;
   1659 	}
   1660 
   1661 	RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
   1662 	{
   1663 		return lhs = lhs * rhs;
   1664 	}
   1665 
   1666 	RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
   1667 	{
   1668 		return lhs = lhs / rhs;
   1669 	}
   1670 
   1671 	RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
   1672 	{
   1673 		return lhs = lhs % rhs;
   1674 	}
   1675 
   1676 	RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
   1677 	{
   1678 		return lhs = lhs & rhs;
   1679 	}
   1680 
   1681 	RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
   1682 	{
   1683 		return lhs = lhs | rhs;
   1684 	}
   1685 
   1686 	RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
   1687 	{
   1688 		return lhs = lhs ^ rhs;
   1689 	}
   1690 
   1691 	RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
   1692 	{
   1693 		return lhs = lhs << rhs;
   1694 	}
   1695 
   1696 	RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
   1697 	{
   1698 		return lhs = lhs >> rhs;
   1699 	}
   1700 
   1701 	RValue<Short> operator+(RValue<Short> val)
   1702 	{
   1703 		return val;
   1704 	}
   1705 
   1706 	RValue<Short> operator-(RValue<Short> val)
   1707 	{
   1708 		return RValue<Short>(Nucleus::createNeg(val.value));
   1709 	}
   1710 
   1711 	RValue<Short> operator~(RValue<Short> val)
   1712 	{
   1713 		return RValue<Short>(Nucleus::createNot(val.value));
   1714 	}
   1715 
   1716 	RValue<Short> operator++(Short &val, int)   // Post-increment
   1717 	{
   1718 		RValue<Short> res = val;
   1719 
   1720 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
   1721 		val.storeValue(inc);
   1722 
   1723 		return res;
   1724 	}
   1725 
   1726 	const Short &operator++(Short &val)   // Pre-increment
   1727 	{
   1728 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
   1729 		val.storeValue(inc);
   1730 
   1731 		return val;
   1732 	}
   1733 
   1734 	RValue<Short> operator--(Short &val, int)   // Post-decrement
   1735 	{
   1736 		RValue<Short> res = val;
   1737 
   1738 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
   1739 		val.storeValue(inc);
   1740 
   1741 		return res;
   1742 	}
   1743 
   1744 	const Short &operator--(Short &val)   // Pre-decrement
   1745 	{
   1746 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
   1747 		val.storeValue(inc);
   1748 
   1749 		return val;
   1750 	}
   1751 
   1752 	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
   1753 	{
   1754 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   1755 	}
   1756 
   1757 	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
   1758 	{
   1759 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   1760 	}
   1761 
   1762 	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
   1763 	{
   1764 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   1765 	}
   1766 
   1767 	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
   1768 	{
   1769 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   1770 	}
   1771 
   1772 	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
   1773 	{
   1774 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1775 	}
   1776 
   1777 	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
   1778 	{
   1779 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1780 	}
   1781 
   1782 	Type *Short::getType()
   1783 	{
   1784 		return T(llvm::Type::getInt16Ty(*::context));
   1785 	}
   1786 
   1787 	UShort::UShort(Argument<UShort> argument)
   1788 	{
   1789 		storeValue(argument.value);
   1790 	}
   1791 
   1792 	UShort::UShort(RValue<UInt> cast)
   1793 	{
   1794 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   1795 
   1796 		storeValue(integer);
   1797 	}
   1798 
   1799 	UShort::UShort(RValue<Int> cast)
   1800 	{
   1801 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   1802 
   1803 		storeValue(integer);
   1804 	}
   1805 
   1806 	UShort::UShort(unsigned short x)
   1807 	{
   1808 		storeValue(Nucleus::createConstantShort(x));
   1809 	}
   1810 
   1811 	UShort::UShort(RValue<UShort> rhs)
   1812 	{
   1813 		storeValue(rhs.value);
   1814 	}
   1815 
   1816 	UShort::UShort(const UShort &rhs)
   1817 	{
   1818 		Value *value = rhs.loadValue();
   1819 		storeValue(value);
   1820 	}
   1821 
   1822 	UShort::UShort(const Reference<UShort> &rhs)
   1823 	{
   1824 		Value *value = rhs.loadValue();
   1825 		storeValue(value);
   1826 	}
   1827 
   1828 	RValue<UShort> UShort::operator=(RValue<UShort> rhs)
   1829 	{
   1830 		storeValue(rhs.value);
   1831 
   1832 		return rhs;
   1833 	}
   1834 
   1835 	RValue<UShort> UShort::operator=(const UShort &rhs)
   1836 	{
   1837 		Value *value = rhs.loadValue();
   1838 		storeValue(value);
   1839 
   1840 		return RValue<UShort>(value);
   1841 	}
   1842 
   1843 	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
   1844 	{
   1845 		Value *value = rhs.loadValue();
   1846 		storeValue(value);
   1847 
   1848 		return RValue<UShort>(value);
   1849 	}
   1850 
   1851 	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
   1852 	{
   1853 		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
   1854 	}
   1855 
   1856 	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
   1857 	{
   1858 		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
   1859 	}
   1860 
   1861 	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
   1862 	{
   1863 		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
   1864 	}
   1865 
   1866 	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
   1867 	{
   1868 		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
   1869 	}
   1870 
   1871 	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
   1872 	{
   1873 		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
   1874 	}
   1875 
   1876 	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
   1877 	{
   1878 		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
   1879 	}
   1880 
   1881 	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
   1882 	{
   1883 		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
   1884 	}
   1885 
   1886 	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
   1887 	{
   1888 		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
   1889 	}
   1890 
   1891 	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
   1892 	{
   1893 		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
   1894 	}
   1895 
   1896 	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
   1897 	{
   1898 		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
   1899 	}
   1900 
   1901 	RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
   1902 	{
   1903 		return lhs = lhs + rhs;
   1904 	}
   1905 
   1906 	RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
   1907 	{
   1908 		return lhs = lhs - rhs;
   1909 	}
   1910 
   1911 	RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
   1912 	{
   1913 		return lhs = lhs * rhs;
   1914 	}
   1915 
   1916 	RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
   1917 	{
   1918 		return lhs = lhs / rhs;
   1919 	}
   1920 
   1921 	RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
   1922 	{
   1923 		return lhs = lhs % rhs;
   1924 	}
   1925 
   1926 	RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
   1927 	{
   1928 		return lhs = lhs & rhs;
   1929 	}
   1930 
   1931 	RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
   1932 	{
   1933 		return lhs = lhs | rhs;
   1934 	}
   1935 
   1936 	RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
   1937 	{
   1938 		return lhs = lhs ^ rhs;
   1939 	}
   1940 
   1941 	RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
   1942 	{
   1943 		return lhs = lhs << rhs;
   1944 	}
   1945 
   1946 	RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
   1947 	{
   1948 		return lhs = lhs >> rhs;
   1949 	}
   1950 
   1951 	RValue<UShort> operator+(RValue<UShort> val)
   1952 	{
   1953 		return val;
   1954 	}
   1955 
   1956 	RValue<UShort> operator-(RValue<UShort> val)
   1957 	{
   1958 		return RValue<UShort>(Nucleus::createNeg(val.value));
   1959 	}
   1960 
   1961 	RValue<UShort> operator~(RValue<UShort> val)
   1962 	{
   1963 		return RValue<UShort>(Nucleus::createNot(val.value));
   1964 	}
   1965 
   1966 	RValue<UShort> operator++(UShort &val, int)   // Post-increment
   1967 	{
   1968 		RValue<UShort> res = val;
   1969 
   1970 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
   1971 		val.storeValue(inc);
   1972 
   1973 		return res;
   1974 	}
   1975 
   1976 	const UShort &operator++(UShort &val)   // Pre-increment
   1977 	{
   1978 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
   1979 		val.storeValue(inc);
   1980 
   1981 		return val;
   1982 	}
   1983 
   1984 	RValue<UShort> operator--(UShort &val, int)   // Post-decrement
   1985 	{
   1986 		RValue<UShort> res = val;
   1987 
   1988 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
   1989 		val.storeValue(inc);
   1990 
   1991 		return res;
   1992 	}
   1993 
   1994 	const UShort &operator--(UShort &val)   // Pre-decrement
   1995 	{
   1996 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
   1997 		val.storeValue(inc);
   1998 
   1999 		return val;
   2000 	}
   2001 
   2002 	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
   2003 	{
   2004 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   2005 	}
   2006 
   2007 	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
   2008 	{
   2009 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   2010 	}
   2011 
   2012 	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
   2013 	{
   2014 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   2015 	}
   2016 
   2017 	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
   2018 	{
   2019 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   2020 	}
   2021 
   2022 	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
   2023 	{
   2024 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   2025 	}
   2026 
   2027 	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
   2028 	{
   2029 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   2030 	}
   2031 
   2032 	Type *UShort::getType()
   2033 	{
   2034 		return T(llvm::Type::getInt16Ty(*::context));
   2035 	}
   2036 
   2037 	Byte4::Byte4(RValue<Byte8> cast)
   2038 	{
   2039 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   2040 	}
   2041 
   2042 	Byte4::Byte4(const Reference<Byte4> &rhs)
   2043 	{
   2044 		Value *value = rhs.loadValue();
   2045 		storeValue(value);
   2046 	}
   2047 
   2048 	Type *Byte4::getType()
   2049 	{
   2050 		return T(Type_v4i8);
   2051 	}
   2052 
   2053 	Type *SByte4::getType()
   2054 	{
   2055 		return T(Type_v4i8);
   2056 	}
   2057 
   2058 	Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
   2059 	{
   2060 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
   2061 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2062 	}
   2063 
   2064 	Byte8::Byte8(RValue<Byte8> rhs)
   2065 	{
   2066 		storeValue(rhs.value);
   2067 	}
   2068 
   2069 	Byte8::Byte8(const Byte8 &rhs)
   2070 	{
   2071 		Value *value = rhs.loadValue();
   2072 		storeValue(value);
   2073 	}
   2074 
   2075 	Byte8::Byte8(const Reference<Byte8> &rhs)
   2076 	{
   2077 		Value *value = rhs.loadValue();
   2078 		storeValue(value);
   2079 	}
   2080 
   2081 	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
   2082 	{
   2083 		storeValue(rhs.value);
   2084 
   2085 		return rhs;
   2086 	}
   2087 
   2088 	RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
   2089 	{
   2090 		Value *value = rhs.loadValue();
   2091 		storeValue(value);
   2092 
   2093 		return RValue<Byte8>(value);
   2094 	}
   2095 
   2096 	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
   2097 	{
   2098 		Value *value = rhs.loadValue();
   2099 		storeValue(value);
   2100 
   2101 		return RValue<Byte8>(value);
   2102 	}
   2103 
   2104 	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2105 	{
   2106 		return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2107 	}
   2108 
   2109 	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2110 	{
   2111 		return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
   2112 	}
   2113 
   2114 //	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2115 //	{
   2116 //		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
   2117 //	}
   2118 
   2119 //	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2120 //	{
   2121 //		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
   2122 //	}
   2123 
   2124 //	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2125 //	{
   2126 //		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
   2127 //	}
   2128 
   2129 	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2130 	{
   2131 		return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
   2132 	}
   2133 
   2134 	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2135 	{
   2136 		return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
   2137 	}
   2138 
   2139 	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2140 	{
   2141 		return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
   2142 	}
   2143 
   2144 //	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
   2145 //	{
   2146 //		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
   2147 //	}
   2148 
   2149 //	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
   2150 //	{
   2151 //		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
   2152 //	}
   2153 
   2154 	RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
   2155 	{
   2156 		return lhs = lhs + rhs;
   2157 	}
   2158 
   2159 	RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
   2160 	{
   2161 		return lhs = lhs - rhs;
   2162 	}
   2163 
   2164 //	RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
   2165 //	{
   2166 //		return lhs = lhs * rhs;
   2167 //	}
   2168 
   2169 //	RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
   2170 //	{
   2171 //		return lhs = lhs / rhs;
   2172 //	}
   2173 
   2174 //	RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
   2175 //	{
   2176 //		return lhs = lhs % rhs;
   2177 //	}
   2178 
   2179 	RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
   2180 	{
   2181 		return lhs = lhs & rhs;
   2182 	}
   2183 
   2184 	RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
   2185 	{
   2186 		return lhs = lhs | rhs;
   2187 	}
   2188 
   2189 	RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
   2190 	{
   2191 		return lhs = lhs ^ rhs;
   2192 	}
   2193 
   2194 //	RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
   2195 //	{
   2196 //		return lhs = lhs << rhs;
   2197 //	}
   2198 
   2199 //	RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
   2200 //	{
   2201 //		return lhs = lhs >> rhs;
   2202 //	}
   2203 
   2204 //	RValue<Byte8> operator+(RValue<Byte8> val)
   2205 //	{
   2206 //		return val;
   2207 //	}
   2208 
   2209 //	RValue<Byte8> operator-(RValue<Byte8> val)
   2210 //	{
   2211 //		return RValue<Byte8>(Nucleus::createNeg(val.value));
   2212 //	}
   2213 
   2214 	RValue<Byte8> operator~(RValue<Byte8> val)
   2215 	{
   2216 		return RValue<Byte8>(Nucleus::createNot(val.value));
   2217 	}
   2218 
   2219 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
   2220 	{
   2221 		return x86::paddusb(x, y);
   2222 	}
   2223 
   2224 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
   2225 	{
   2226 		return x86::psubusb(x, y);
   2227 	}
   2228 
   2229 	RValue<Short4> Unpack(RValue<Byte4> x)
   2230 	{
   2231 		int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
   2232 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
   2233 	}
   2234 
   2235 	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
   2236 	{
   2237 		return UnpackLow(As<Byte8>(x), As<Byte8>(y));
   2238 	}
   2239 
   2240 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
   2241 	{
   2242 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2243 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2244 	}
   2245 
   2246 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
   2247 	{
   2248 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2249 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2250 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
   2251 	}
   2252 
   2253 	RValue<Int> SignMask(RValue<Byte8> x)
   2254 	{
   2255 		return x86::pmovmskb(x);
   2256 	}
   2257 
   2258 //	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
   2259 //	{
   2260 //		return x86::pcmpgtb(x, y);   // FIXME: Signedness
   2261 //	}
   2262 
   2263 	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
   2264 	{
   2265 		return x86::pcmpeqb(x, y);
   2266 	}
   2267 
   2268 	Type *Byte8::getType()
   2269 	{
   2270 		return T(Type_v8i8);
   2271 	}
   2272 
   2273 	SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
   2274 	{
   2275 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
   2276 		Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
   2277 
   2278 		storeValue(Nucleus::createBitCast(vector, getType()));
   2279 	}
   2280 
   2281 	SByte8::SByte8(RValue<SByte8> rhs)
   2282 	{
   2283 		storeValue(rhs.value);
   2284 	}
   2285 
   2286 	SByte8::SByte8(const SByte8 &rhs)
   2287 	{
   2288 		Value *value = rhs.loadValue();
   2289 		storeValue(value);
   2290 	}
   2291 
   2292 	SByte8::SByte8(const Reference<SByte8> &rhs)
   2293 	{
   2294 		Value *value = rhs.loadValue();
   2295 		storeValue(value);
   2296 	}
   2297 
   2298 	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
   2299 	{
   2300 		storeValue(rhs.value);
   2301 
   2302 		return rhs;
   2303 	}
   2304 
   2305 	RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
   2306 	{
   2307 		Value *value = rhs.loadValue();
   2308 		storeValue(value);
   2309 
   2310 		return RValue<SByte8>(value);
   2311 	}
   2312 
   2313 	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
   2314 	{
   2315 		Value *value = rhs.loadValue();
   2316 		storeValue(value);
   2317 
   2318 		return RValue<SByte8>(value);
   2319 	}
   2320 
   2321 	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2322 	{
   2323 		return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2324 	}
   2325 
   2326 	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2327 	{
   2328 		return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
   2329 	}
   2330 
   2331 //	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2332 //	{
   2333 //		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
   2334 //	}
   2335 
   2336 //	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2337 //	{
   2338 //		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
   2339 //	}
   2340 
   2341 //	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2342 //	{
   2343 //		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
   2344 //	}
   2345 
   2346 	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2347 	{
   2348 		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
   2349 	}
   2350 
   2351 	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2352 	{
   2353 		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
   2354 	}
   2355 
   2356 	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2357 	{
   2358 		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
   2359 	}
   2360 
   2361 //	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
   2362 //	{
   2363 //		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
   2364 //	}
   2365 
   2366 //	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
   2367 //	{
   2368 //		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
   2369 //	}
   2370 
   2371 	RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
   2372 	{
   2373 		return lhs = lhs + rhs;
   2374 	}
   2375 
   2376 	RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
   2377 	{
   2378 		return lhs = lhs - rhs;
   2379 	}
   2380 
   2381 //	RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
   2382 //	{
   2383 //		return lhs = lhs * rhs;
   2384 //	}
   2385 
   2386 //	RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
   2387 //	{
   2388 //		return lhs = lhs / rhs;
   2389 //	}
   2390 
   2391 //	RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
   2392 //	{
   2393 //		return lhs = lhs % rhs;
   2394 //	}
   2395 
   2396 	RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
   2397 	{
   2398 		return lhs = lhs & rhs;
   2399 	}
   2400 
   2401 	RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
   2402 	{
   2403 		return lhs = lhs | rhs;
   2404 	}
   2405 
   2406 	RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
   2407 	{
   2408 		return lhs = lhs ^ rhs;
   2409 	}
   2410 
   2411 //	RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
   2412 //	{
   2413 //		return lhs = lhs << rhs;
   2414 //	}
   2415 
   2416 //	RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
   2417 //	{
   2418 //		return lhs = lhs >> rhs;
   2419 //	}
   2420 
   2421 //	RValue<SByte8> operator+(RValue<SByte8> val)
   2422 //	{
   2423 //		return val;
   2424 //	}
   2425 
   2426 //	RValue<SByte8> operator-(RValue<SByte8> val)
   2427 //	{
   2428 //		return RValue<SByte8>(Nucleus::createNeg(val.value));
   2429 //	}
   2430 
   2431 	RValue<SByte8> operator~(RValue<SByte8> val)
   2432 	{
   2433 		return RValue<SByte8>(Nucleus::createNot(val.value));
   2434 	}
   2435 
   2436 	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
   2437 	{
   2438 		return x86::paddsb(x, y);
   2439 	}
   2440 
   2441 	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
   2442 	{
   2443 		return x86::psubsb(x, y);
   2444 	}
   2445 
   2446 	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
   2447 	{
   2448 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2449 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2450 	}
   2451 
   2452 	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
   2453 	{
   2454 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
   2455 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2456 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
   2457 	}
   2458 
   2459 	RValue<Int> SignMask(RValue<SByte8> x)
   2460 	{
   2461 		return x86::pmovmskb(As<Byte8>(x));
   2462 	}
   2463 
   2464 	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
   2465 	{
   2466 		return x86::pcmpgtb(x, y);
   2467 	}
   2468 
   2469 	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
   2470 	{
   2471 		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
   2472 	}
   2473 
   2474 	Type *SByte8::getType()
   2475 	{
   2476 		return T(Type_v8i8);
   2477 	}
   2478 
   2479 	Byte16::Byte16(RValue<Byte16> rhs)
   2480 	{
   2481 		storeValue(rhs.value);
   2482 	}
   2483 
   2484 	Byte16::Byte16(const Byte16 &rhs)
   2485 	{
   2486 		Value *value = rhs.loadValue();
   2487 		storeValue(value);
   2488 	}
   2489 
   2490 	Byte16::Byte16(const Reference<Byte16> &rhs)
   2491 	{
   2492 		Value *value = rhs.loadValue();
   2493 		storeValue(value);
   2494 	}
   2495 
   2496 	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
   2497 	{
   2498 		storeValue(rhs.value);
   2499 
   2500 		return rhs;
   2501 	}
   2502 
   2503 	RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
   2504 	{
   2505 		Value *value = rhs.loadValue();
   2506 		storeValue(value);
   2507 
   2508 		return RValue<Byte16>(value);
   2509 	}
   2510 
   2511 	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
   2512 	{
   2513 		Value *value = rhs.loadValue();
   2514 		storeValue(value);
   2515 
   2516 		return RValue<Byte16>(value);
   2517 	}
   2518 
   2519 	Type *Byte16::getType()
   2520 	{
   2521 		return T(llvm::VectorType::get(T(Byte::getType()), 16));
   2522 	}
   2523 
   2524 	Type *SByte16::getType()
   2525 	{
   2526 		return T(llvm::VectorType::get(T(SByte::getType()), 16));
   2527 	}
   2528 
   2529 	Short2::Short2(RValue<Short4> cast)
   2530 	{
   2531 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   2532 	}
   2533 
   2534 	Type *Short2::getType()
   2535 	{
   2536 		return T(Type_v2i16);
   2537 	}
   2538 
   2539 	UShort2::UShort2(RValue<UShort4> cast)
   2540 	{
   2541 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   2542 	}
   2543 
   2544 	Type *UShort2::getType()
   2545 	{
   2546 		return T(Type_v2i16);
   2547 	}
   2548 
   2549 	Short4::Short4(RValue<Int> cast)
   2550 	{
   2551 		Value *vector = loadValue();
   2552 		Value *element = Nucleus::createTrunc(cast.value, Short::getType());
   2553 		Value *insert = Nucleus::createInsertElement(vector, element, 0);
   2554 		Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
   2555 
   2556 		storeValue(swizzle);
   2557 	}
   2558 
   2559 	Short4::Short4(RValue<Int4> cast)
   2560 	{
   2561 		int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
   2562 		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
   2563 
   2564 		Value *packed = Nucleus::createShuffleVector(short8, short8, select);
   2565 		Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
   2566 
   2567 		storeValue(short4);
   2568 	}
   2569 
   2570 //	Short4::Short4(RValue<Float> cast)
   2571 //	{
   2572 //	}
   2573 
   2574 	Short4::Short4(RValue<Float4> cast)
   2575 	{
   2576 		Int4 v4i32 = Int4(cast);
   2577 		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
   2578 
   2579 		storeValue(As<Short4>(Int2(v4i32)).value);
   2580 	}
   2581 
   2582 	Short4::Short4(short xyzw)
   2583 	{
   2584 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
   2585 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2586 	}
   2587 
   2588 	Short4::Short4(short x, short y, short z, short w)
   2589 	{
   2590 		int64_t constantVector[4] = {x, y, z, w};
   2591 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2592 	}
   2593 
   2594 	Short4::Short4(RValue<Short4> rhs)
   2595 	{
   2596 		storeValue(rhs.value);
   2597 	}
   2598 
   2599 	Short4::Short4(const Short4 &rhs)
   2600 	{
   2601 		Value *value = rhs.loadValue();
   2602 		storeValue(value);
   2603 	}
   2604 
   2605 	Short4::Short4(const Reference<Short4> &rhs)
   2606 	{
   2607 		Value *value = rhs.loadValue();
   2608 		storeValue(value);
   2609 	}
   2610 
   2611 	Short4::Short4(RValue<UShort4> rhs)
   2612 	{
   2613 		storeValue(rhs.value);
   2614 	}
   2615 
   2616 	Short4::Short4(const UShort4 &rhs)
   2617 	{
   2618 		storeValue(rhs.loadValue());
   2619 	}
   2620 
   2621 	Short4::Short4(const Reference<UShort4> &rhs)
   2622 	{
   2623 		storeValue(rhs.loadValue());
   2624 	}
   2625 
   2626 	RValue<Short4> Short4::operator=(RValue<Short4> rhs)
   2627 	{
   2628 		storeValue(rhs.value);
   2629 
   2630 		return rhs;
   2631 	}
   2632 
   2633 	RValue<Short4> Short4::operator=(const Short4 &rhs)
   2634 	{
   2635 		Value *value = rhs.loadValue();
   2636 		storeValue(value);
   2637 
   2638 		return RValue<Short4>(value);
   2639 	}
   2640 
   2641 	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
   2642 	{
   2643 		Value *value = rhs.loadValue();
   2644 		storeValue(value);
   2645 
   2646 		return RValue<Short4>(value);
   2647 	}
   2648 
   2649 	RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
   2650 	{
   2651 		storeValue(rhs.value);
   2652 
   2653 		return RValue<Short4>(rhs);
   2654 	}
   2655 
   2656 	RValue<Short4> Short4::operator=(const UShort4 &rhs)
   2657 	{
   2658 		Value *value = rhs.loadValue();
   2659 		storeValue(value);
   2660 
   2661 		return RValue<Short4>(value);
   2662 	}
   2663 
   2664 	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
   2665 	{
   2666 		Value *value = rhs.loadValue();
   2667 		storeValue(value);
   2668 
   2669 		return RValue<Short4>(value);
   2670 	}
   2671 
   2672 	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
   2673 	{
   2674 		return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
   2675 	}
   2676 
   2677 	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
   2678 	{
   2679 		return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
   2680 	}
   2681 
   2682 	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
   2683 	{
   2684 		return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
   2685 	}
   2686 
   2687 //	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
   2688 //	{
   2689 //		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
   2690 //	}
   2691 
   2692 //	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
   2693 //	{
   2694 //		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
   2695 //	}
   2696 
   2697 	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
   2698 	{
   2699 		return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
   2700 	}
   2701 
   2702 	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
   2703 	{
   2704 		return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
   2705 	}
   2706 
   2707 	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
   2708 	{
   2709 		return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
   2710 	}
   2711 
   2712 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
   2713 	{
   2714 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   2715 
   2716 		return x86::psllw(lhs, rhs);
   2717 	}
   2718 
   2719 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
   2720 	{
   2721 	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
   2722 
   2723 		return x86::psraw(lhs, rhs);
   2724 	}
   2725 
   2726 	RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
   2727 	{
   2728 		return lhs = lhs + rhs;
   2729 	}
   2730 
   2731 	RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
   2732 	{
   2733 		return lhs = lhs - rhs;
   2734 	}
   2735 
   2736 	RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
   2737 	{
   2738 		return lhs = lhs * rhs;
   2739 	}
   2740 
   2741 //	RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
   2742 //	{
   2743 //		return lhs = lhs / rhs;
   2744 //	}
   2745 
   2746 //	RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
   2747 //	{
   2748 //		return lhs = lhs % rhs;
   2749 //	}
   2750 
   2751 	RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
   2752 	{
   2753 		return lhs = lhs & rhs;
   2754 	}
   2755 
   2756 	RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
   2757 	{
   2758 		return lhs = lhs | rhs;
   2759 	}
   2760 
   2761 	RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
   2762 	{
   2763 		return lhs = lhs ^ rhs;
   2764 	}
   2765 
   2766 	RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
   2767 	{
   2768 		return lhs = lhs << rhs;
   2769 	}
   2770 
   2771 	RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
   2772 	{
   2773 		return lhs = lhs >> rhs;
   2774 	}
   2775 
   2776 //	RValue<Short4> operator+(RValue<Short4> val)
   2777 //	{
   2778 //		return val;
   2779 //	}
   2780 
   2781 	RValue<Short4> operator-(RValue<Short4> val)
   2782 	{
   2783 		return RValue<Short4>(Nucleus::createNeg(val.value));
   2784 	}
   2785 
   2786 	RValue<Short4> operator~(RValue<Short4> val)
   2787 	{
   2788 		return RValue<Short4>(Nucleus::createNot(val.value));
   2789 	}
   2790 
   2791 	RValue<Short4> RoundShort4(RValue<Float4> cast)
   2792 	{
   2793 		RValue<Int4> int4 = RoundInt(cast);
   2794 		return As<Short4>(PackSigned(int4, int4));
   2795 	}
   2796 
   2797 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
   2798 	{
   2799 		return x86::pmaxsw(x, y);
   2800 	}
   2801 
   2802 	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
   2803 	{
   2804 		return x86::pminsw(x, y);
   2805 	}
   2806 
   2807 	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
   2808 	{
   2809 		return x86::paddsw(x, y);
   2810 	}
   2811 
   2812 	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
   2813 	{
   2814 		return x86::psubsw(x, y);
   2815 	}
   2816 
   2817 	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
   2818 	{
   2819 		return x86::pmulhw(x, y);
   2820 	}
   2821 
   2822 	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
   2823 	{
   2824 		return x86::pmaddwd(x, y);
   2825 	}
   2826 
   2827 	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
   2828 	{
   2829 		auto result = x86::packsswb(x, y);
   2830 
   2831 		return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
   2832 	}
   2833 
   2834 	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
   2835 	{
   2836 		auto result = x86::packuswb(x, y);
   2837 
   2838 		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
   2839 	}
   2840 
   2841 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
   2842 	{
   2843 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
   2844 		return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2845 	}
   2846 
   2847 	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
   2848 	{
   2849 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
   2850 		auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   2851 		return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
   2852 	}
   2853 
   2854 	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
   2855 	{
   2856 		// Real type is v8i16
   2857 		int shuffle[8] =
   2858 		{
   2859 			(select >> 0) & 0x03,
   2860 			(select >> 2) & 0x03,
   2861 			(select >> 4) & 0x03,
   2862 			(select >> 6) & 0x03,
   2863 			(select >> 0) & 0x03,
   2864 			(select >> 2) & 0x03,
   2865 			(select >> 4) & 0x03,
   2866 			(select >> 6) & 0x03,
   2867 		};
   2868 
   2869 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
   2870 	}
   2871 
   2872 	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
   2873 	{
   2874 		return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
   2875 	}
   2876 
   2877 	RValue<Short> Extract(RValue<Short4> val, int i)
   2878 	{
   2879 		return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
   2880 	}
   2881 
   2882 	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
   2883 	{
   2884 		return x86::pcmpgtw(x, y);
   2885 	}
   2886 
   2887 	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
   2888 	{
   2889 		return x86::pcmpeqw(x, y);
   2890 	}
   2891 
   2892 	Type *Short4::getType()
   2893 	{
   2894 		return T(Type_v4i16);
   2895 	}
   2896 
   2897 	UShort4::UShort4(RValue<Int4> cast)
   2898 	{
   2899 		*this = Short4(cast);
   2900 	}
   2901 
   2902 	UShort4::UShort4(RValue<Float4> cast, bool saturate)
   2903 	{
   2904 		if(saturate)
   2905 		{
   2906 			if(CPUID::supportsSSE4_1())
   2907 			{
   2908 				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
   2909 				*this = As<Short4>(PackUnsigned(int4, int4));
   2910 			}
   2911 			else
   2912 			{
   2913 				*this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
   2914 			}
   2915 		}
   2916 		else
   2917 		{
   2918 			*this = Short4(Int4(cast));
   2919 		}
   2920 	}
   2921 
   2922 	UShort4::UShort4(unsigned short xyzw)
   2923 	{
   2924 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
   2925 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2926 	}
   2927 
   2928 	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
   2929 	{
   2930 		int64_t constantVector[4] = {x, y, z, w};
   2931 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   2932 	}
   2933 
   2934 	UShort4::UShort4(RValue<UShort4> rhs)
   2935 	{
   2936 		storeValue(rhs.value);
   2937 	}
   2938 
   2939 	UShort4::UShort4(const UShort4 &rhs)
   2940 	{
   2941 		Value *value = rhs.loadValue();
   2942 		storeValue(value);
   2943 	}
   2944 
   2945 	UShort4::UShort4(const Reference<UShort4> &rhs)
   2946 	{
   2947 		Value *value = rhs.loadValue();
   2948 		storeValue(value);
   2949 	}
   2950 
   2951 	UShort4::UShort4(RValue<Short4> rhs)
   2952 	{
   2953 		storeValue(rhs.value);
   2954 	}
   2955 
   2956 	UShort4::UShort4(const Short4 &rhs)
   2957 	{
   2958 		Value *value = rhs.loadValue();
   2959 		storeValue(value);
   2960 	}
   2961 
   2962 	UShort4::UShort4(const Reference<Short4> &rhs)
   2963 	{
   2964 		Value *value = rhs.loadValue();
   2965 		storeValue(value);
   2966 	}
   2967 
   2968 	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
   2969 	{
   2970 		storeValue(rhs.value);
   2971 
   2972 		return rhs;
   2973 	}
   2974 
   2975 	RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
   2976 	{
   2977 		Value *value = rhs.loadValue();
   2978 		storeValue(value);
   2979 
   2980 		return RValue<UShort4>(value);
   2981 	}
   2982 
   2983 	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
   2984 	{
   2985 		Value *value = rhs.loadValue();
   2986 		storeValue(value);
   2987 
   2988 		return RValue<UShort4>(value);
   2989 	}
   2990 
   2991 	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
   2992 	{
   2993 		storeValue(rhs.value);
   2994 
   2995 		return RValue<UShort4>(rhs);
   2996 	}
   2997 
   2998 	RValue<UShort4> UShort4::operator=(const Short4 &rhs)
   2999 	{
   3000 		Value *value = rhs.loadValue();
   3001 		storeValue(value);
   3002 
   3003 		return RValue<UShort4>(value);
   3004 	}
   3005 
   3006 	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
   3007 	{
   3008 		Value *value = rhs.loadValue();
   3009 		storeValue(value);
   3010 
   3011 		return RValue<UShort4>(value);
   3012 	}
   3013 
   3014 	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3015 	{
   3016 		return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
   3017 	}
   3018 
   3019 	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3020 	{
   3021 		return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
   3022 	}
   3023 
   3024 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3025 	{
   3026 		return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
   3027 	}
   3028 
   3029 	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3030 	{
   3031 		return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
   3032 	}
   3033 
   3034 	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3035 	{
   3036 		return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
   3037 	}
   3038 
   3039 	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3040 	{
   3041 		return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
   3042 	}
   3043 
   3044 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
   3045 	{
   3046 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   3047 
   3048 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
   3049 	}
   3050 
   3051 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
   3052 	{
   3053 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
   3054 
   3055 		return x86::psrlw(lhs, rhs);
   3056 	}
   3057 
   3058 	RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
   3059 	{
   3060 		return lhs = lhs << rhs;
   3061 	}
   3062 
   3063 	RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
   3064 	{
   3065 		return lhs = lhs >> rhs;
   3066 	}
   3067 
   3068 	RValue<UShort4> operator~(RValue<UShort4> val)
   3069 	{
   3070 		return RValue<UShort4>(Nucleus::createNot(val.value));
   3071 	}
   3072 
   3073 	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
   3074 	{
   3075 		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3076 	}
   3077 
   3078 	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
   3079 	{
   3080 		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3081 	}
   3082 
   3083 	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
   3084 	{
   3085 		return x86::paddusw(x, y);
   3086 	}
   3087 
   3088 	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
   3089 	{
   3090 		return x86::psubusw(x, y);
   3091 	}
   3092 
   3093 	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
   3094 	{
   3095 		return x86::pmulhuw(x, y);
   3096 	}
   3097 
   3098 	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
   3099 	{
   3100 		return x86::pavgw(x, y);
   3101 	}
   3102 
   3103 	Type *UShort4::getType()
   3104 	{
   3105 		return T(Type_v4i16);
   3106 	}
   3107 
   3108 	Short8::Short8(short c)
   3109 	{
   3110 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
   3111 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3112 	}
   3113 
   3114 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
   3115 	{
   3116 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
   3117 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3118 	}
   3119 
   3120 	Short8::Short8(RValue<Short8> rhs)
   3121 	{
   3122 		storeValue(rhs.value);
   3123 	}
   3124 
   3125 	Short8::Short8(const Reference<Short8> &rhs)
   3126 	{
   3127 		Value *value = rhs.loadValue();
   3128 		storeValue(value);
   3129 	}
   3130 
   3131 	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
   3132 	{
   3133 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
   3134 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   3135 
   3136 		storeValue(packed);
   3137 	}
   3138 
   3139 	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
   3140 	{
   3141 		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
   3142 	}
   3143 
   3144 	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
   3145 	{
   3146 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
   3147 	}
   3148 
   3149 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
   3150 	{
   3151 		return x86::psllw(lhs, rhs);   // FIXME: Fallback required
   3152 	}
   3153 
   3154 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
   3155 	{
   3156 		return x86::psraw(lhs, rhs);   // FIXME: Fallback required
   3157 	}
   3158 
   3159 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
   3160 	{
   3161 		return x86::pmaddwd(x, y);   // FIXME: Fallback required
   3162 	}
   3163 
   3164 	RValue<Int4> Abs(RValue<Int4> x)
   3165 	{
   3166 		auto negative = x >> 31;
   3167 		return (x ^ negative) - negative;
   3168 	}
   3169 
   3170 	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
   3171 	{
   3172 		return x86::pmulhw(x, y);   // FIXME: Fallback required
   3173 	}
   3174 
   3175 	Type *Short8::getType()
   3176 	{
   3177 		return T(llvm::VectorType::get(T(Short::getType()), 8));
   3178 	}
   3179 
   3180 	UShort8::UShort8(unsigned short c)
   3181 	{
   3182 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
   3183 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3184 	}
   3185 
   3186 	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
   3187 	{
   3188 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
   3189 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   3190 	}
   3191 
   3192 	UShort8::UShort8(RValue<UShort8> rhs)
   3193 	{
   3194 		storeValue(rhs.value);
   3195 	}
   3196 
   3197 	UShort8::UShort8(const Reference<UShort8> &rhs)
   3198 	{
   3199 		Value *value = rhs.loadValue();
   3200 		storeValue(value);
   3201 	}
   3202 
   3203 	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
   3204 	{
   3205 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
   3206 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   3207 
   3208 		storeValue(packed);
   3209 	}
   3210 
   3211 	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
   3212 	{
   3213 		storeValue(rhs.value);
   3214 
   3215 		return rhs;
   3216 	}
   3217 
   3218 	RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
   3219 	{
   3220 		Value *value = rhs.loadValue();
   3221 		storeValue(value);
   3222 
   3223 		return RValue<UShort8>(value);
   3224 	}
   3225 
   3226 	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
   3227 	{
   3228 		Value *value = rhs.loadValue();
   3229 		storeValue(value);
   3230 
   3231 		return RValue<UShort8>(value);
   3232 	}
   3233 
   3234 	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3235 	{
   3236 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
   3237 	}
   3238 
   3239 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
   3240 	{
   3241 		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
   3242 	}
   3243 
   3244 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
   3245 	{
   3246 		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
   3247 	}
   3248 
   3249 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3250 	{
   3251 		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
   3252 	}
   3253 
   3254 	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3255 	{
   3256 		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
   3257 	}
   3258 
   3259 	RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
   3260 	{
   3261 		return lhs = lhs + rhs;
   3262 	}
   3263 
   3264 	RValue<UShort8> operator~(RValue<UShort8> val)
   3265 	{
   3266 		return RValue<UShort8>(Nucleus::createNot(val.value));
   3267 	}
   3268 
   3269 	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
   3270 	{
   3271 		int pshufb[16] =
   3272 		{
   3273 			select0 + 0,
   3274 			select0 + 1,
   3275 			select1 + 0,
   3276 			select1 + 1,
   3277 			select2 + 0,
   3278 			select2 + 1,
   3279 			select3 + 0,
   3280 			select3 + 1,
   3281 			select4 + 0,
   3282 			select4 + 1,
   3283 			select5 + 0,
   3284 			select5 + 1,
   3285 			select6 + 0,
   3286 			select6 + 1,
   3287 			select7 + 0,
   3288 			select7 + 1,
   3289 		};
   3290 
   3291 		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
   3292 		Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
   3293 		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
   3294 
   3295 		return RValue<UShort8>(short8);
   3296 	}
   3297 
   3298 	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
   3299 	{
   3300 		return x86::pmulhuw(x, y);   // FIXME: Fallback required
   3301 	}
   3302 
   3303 	Type *UShort8::getType()
   3304 	{
   3305 		return T(llvm::VectorType::get(T(UShort::getType()), 8));
   3306 	}
   3307 
   3308 	Int::Int(Argument<Int> argument)
   3309 	{
   3310 		storeValue(argument.value);
   3311 	}
   3312 
   3313 	Int::Int(RValue<Byte> cast)
   3314 	{
   3315 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   3316 
   3317 		storeValue(integer);
   3318 	}
   3319 
   3320 	Int::Int(RValue<SByte> cast)
   3321 	{
   3322 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   3323 
   3324 		storeValue(integer);
   3325 	}
   3326 
   3327 	Int::Int(RValue<Short> cast)
   3328 	{
   3329 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   3330 
   3331 		storeValue(integer);
   3332 	}
   3333 
   3334 	Int::Int(RValue<UShort> cast)
   3335 	{
   3336 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   3337 
   3338 		storeValue(integer);
   3339 	}
   3340 
   3341 	Int::Int(RValue<Int2> cast)
   3342 	{
   3343 		*this = Extract(cast, 0);
   3344 	}
   3345 
   3346 	Int::Int(RValue<Long> cast)
   3347 	{
   3348 		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
   3349 
   3350 		storeValue(integer);
   3351 	}
   3352 
   3353 	Int::Int(RValue<Float> cast)
   3354 	{
   3355 		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
   3356 
   3357 		storeValue(integer);
   3358 	}
   3359 
   3360 	Int::Int(int x)
   3361 	{
   3362 		storeValue(Nucleus::createConstantInt(x));
   3363 	}
   3364 
   3365 	Int::Int(RValue<Int> rhs)
   3366 	{
   3367 		storeValue(rhs.value);
   3368 	}
   3369 
   3370 	Int::Int(RValue<UInt> rhs)
   3371 	{
   3372 		storeValue(rhs.value);
   3373 	}
   3374 
   3375 	Int::Int(const Int &rhs)
   3376 	{
   3377 		Value *value = rhs.loadValue();
   3378 		storeValue(value);
   3379 	}
   3380 
   3381 	Int::Int(const Reference<Int> &rhs)
   3382 	{
   3383 		Value *value = rhs.loadValue();
   3384 		storeValue(value);
   3385 	}
   3386 
   3387 	Int::Int(const UInt &rhs)
   3388 	{
   3389 		Value *value = rhs.loadValue();
   3390 		storeValue(value);
   3391 	}
   3392 
   3393 	Int::Int(const Reference<UInt> &rhs)
   3394 	{
   3395 		Value *value = rhs.loadValue();
   3396 		storeValue(value);
   3397 	}
   3398 
   3399 	RValue<Int> Int::operator=(int rhs)
   3400 	{
   3401 		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
   3402 	}
   3403 
   3404 	RValue<Int> Int::operator=(RValue<Int> rhs)
   3405 	{
   3406 		storeValue(rhs.value);
   3407 
   3408 		return rhs;
   3409 	}
   3410 
   3411 	RValue<Int> Int::operator=(RValue<UInt> rhs)
   3412 	{
   3413 		storeValue(rhs.value);
   3414 
   3415 		return RValue<Int>(rhs);
   3416 	}
   3417 
   3418 	RValue<Int> Int::operator=(const Int &rhs)
   3419 	{
   3420 		Value *value = rhs.loadValue();
   3421 		storeValue(value);
   3422 
   3423 		return RValue<Int>(value);
   3424 	}
   3425 
   3426 	RValue<Int> Int::operator=(const Reference<Int> &rhs)
   3427 	{
   3428 		Value *value = rhs.loadValue();
   3429 		storeValue(value);
   3430 
   3431 		return RValue<Int>(value);
   3432 	}
   3433 
   3434 	RValue<Int> Int::operator=(const UInt &rhs)
   3435 	{
   3436 		Value *value = rhs.loadValue();
   3437 		storeValue(value);
   3438 
   3439 		return RValue<Int>(value);
   3440 	}
   3441 
   3442 	RValue<Int> Int::operator=(const Reference<UInt> &rhs)
   3443 	{
   3444 		Value *value = rhs.loadValue();
   3445 		storeValue(value);
   3446 
   3447 		return RValue<Int>(value);
   3448 	}
   3449 
   3450 	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
   3451 	{
   3452 		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
   3453 	}
   3454 
   3455 	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
   3456 	{
   3457 		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
   3458 	}
   3459 
   3460 	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
   3461 	{
   3462 		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
   3463 	}
   3464 
   3465 	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
   3466 	{
   3467 		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
   3468 	}
   3469 
   3470 	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
   3471 	{
   3472 		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
   3473 	}
   3474 
   3475 	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
   3476 	{
   3477 		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
   3478 	}
   3479 
   3480 	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
   3481 	{
   3482 		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
   3483 	}
   3484 
   3485 	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
   3486 	{
   3487 		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
   3488 	}
   3489 
   3490 	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
   3491 	{
   3492 		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
   3493 	}
   3494 
   3495 	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
   3496 	{
   3497 		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
   3498 	}
   3499 
   3500 	RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
   3501 	{
   3502 		return lhs = lhs + rhs;
   3503 	}
   3504 
   3505 	RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
   3506 	{
   3507 		return lhs = lhs - rhs;
   3508 	}
   3509 
   3510 	RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
   3511 	{
   3512 		return lhs = lhs * rhs;
   3513 	}
   3514 
   3515 	RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
   3516 	{
   3517 		return lhs = lhs / rhs;
   3518 	}
   3519 
   3520 	RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
   3521 	{
   3522 		return lhs = lhs % rhs;
   3523 	}
   3524 
   3525 	RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
   3526 	{
   3527 		return lhs = lhs & rhs;
   3528 	}
   3529 
   3530 	RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
   3531 	{
   3532 		return lhs = lhs | rhs;
   3533 	}
   3534 
   3535 	RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
   3536 	{
   3537 		return lhs = lhs ^ rhs;
   3538 	}
   3539 
   3540 	RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
   3541 	{
   3542 		return lhs = lhs << rhs;
   3543 	}
   3544 
   3545 	RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
   3546 	{
   3547 		return lhs = lhs >> rhs;
   3548 	}
   3549 
   3550 	RValue<Int> operator+(RValue<Int> val)
   3551 	{
   3552 		return val;
   3553 	}
   3554 
   3555 	RValue<Int> operator-(RValue<Int> val)
   3556 	{
   3557 		return RValue<Int>(Nucleus::createNeg(val.value));
   3558 	}
   3559 
   3560 	RValue<Int> operator~(RValue<Int> val)
   3561 	{
   3562 		return RValue<Int>(Nucleus::createNot(val.value));
   3563 	}
   3564 
   3565 	RValue<Int> operator++(Int &val, int)   // Post-increment
   3566 	{
   3567 		RValue<Int> res = val;
   3568 
   3569 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
   3570 		val.storeValue(inc);
   3571 
   3572 		return res;
   3573 	}
   3574 
   3575 	const Int &operator++(Int &val)   // Pre-increment
   3576 	{
   3577 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
   3578 		val.storeValue(inc);
   3579 
   3580 		return val;
   3581 	}
   3582 
   3583 	RValue<Int> operator--(Int &val, int)   // Post-decrement
   3584 	{
   3585 		RValue<Int> res = val;
   3586 
   3587 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
   3588 		val.storeValue(inc);
   3589 
   3590 		return res;
   3591 	}
   3592 
   3593 	const Int &operator--(Int &val)   // Pre-decrement
   3594 	{
   3595 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
   3596 		val.storeValue(inc);
   3597 
   3598 		return val;
   3599 	}
   3600 
   3601 	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
   3602 	{
   3603 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   3604 	}
   3605 
   3606 	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
   3607 	{
   3608 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   3609 	}
   3610 
   3611 	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
   3612 	{
   3613 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   3614 	}
   3615 
   3616 	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
   3617 	{
   3618 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   3619 	}
   3620 
   3621 	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
   3622 	{
   3623 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   3624 	}
   3625 
   3626 	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
   3627 	{
   3628 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   3629 	}
   3630 
   3631 	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
   3632 	{
   3633 		return IfThenElse(x > y, x, y);
   3634 	}
   3635 
   3636 	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
   3637 	{
   3638 		return IfThenElse(x < y, x, y);
   3639 	}
   3640 
   3641 	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
   3642 	{
   3643 		return Min(Max(x, min), max);
   3644 	}
   3645 
   3646 	RValue<Int> RoundInt(RValue<Float> cast)
   3647 	{
   3648 		return x86::cvtss2si(cast);
   3649 
   3650 	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
   3651 	}
   3652 
   3653 	Type *Int::getType()
   3654 	{
   3655 		return T(llvm::Type::getInt32Ty(*::context));
   3656 	}
   3657 
   3658 	Long::Long(RValue<Int> cast)
   3659 	{
   3660 		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
   3661 
   3662 		storeValue(integer);
   3663 	}
   3664 
   3665 	Long::Long(RValue<UInt> cast)
   3666 	{
   3667 		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
   3668 
   3669 		storeValue(integer);
   3670 	}
   3671 
   3672 	Long::Long(RValue<Long> rhs)
   3673 	{
   3674 		storeValue(rhs.value);
   3675 	}
   3676 
   3677 	RValue<Long> Long::operator=(int64_t rhs)
   3678 	{
   3679 		return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
   3680 	}
   3681 
   3682 	RValue<Long> Long::operator=(RValue<Long> rhs)
   3683 	{
   3684 		storeValue(rhs.value);
   3685 
   3686 		return rhs;
   3687 	}
   3688 
   3689 	RValue<Long> Long::operator=(const Long &rhs)
   3690 	{
   3691 		Value *value = rhs.loadValue();
   3692 		storeValue(value);
   3693 
   3694 		return RValue<Long>(value);
   3695 	}
   3696 
   3697 	RValue<Long> Long::operator=(const Reference<Long> &rhs)
   3698 	{
   3699 		Value *value = rhs.loadValue();
   3700 		storeValue(value);
   3701 
   3702 		return RValue<Long>(value);
   3703 	}
   3704 
   3705 	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
   3706 	{
   3707 		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
   3708 	}
   3709 
   3710 	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
   3711 	{
   3712 		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
   3713 	}
   3714 
   3715 	RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
   3716 	{
   3717 		return lhs = lhs + rhs;
   3718 	}
   3719 
   3720 	RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
   3721 	{
   3722 		return lhs = lhs - rhs;
   3723 	}
   3724 
   3725 	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
   3726 	{
   3727 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
   3728 	}
   3729 
   3730 	Type *Long::getType()
   3731 	{
   3732 		return T(llvm::Type::getInt64Ty(*::context));
   3733 	}
   3734 
   3735 	UInt::UInt(Argument<UInt> argument)
   3736 	{
   3737 		storeValue(argument.value);
   3738 	}
   3739 
   3740 	UInt::UInt(RValue<UShort> cast)
   3741 	{
   3742 		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
   3743 
   3744 		storeValue(integer);
   3745 	}
   3746 
   3747 	UInt::UInt(RValue<Long> cast)
   3748 	{
   3749 		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
   3750 
   3751 		storeValue(integer);
   3752 	}
   3753 
   3754 	UInt::UInt(RValue<Float> cast)
   3755 	{
   3756 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
   3757 		// Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
   3758 
   3759 		// Smallest positive value representable in UInt, but not in Int
   3760 		const unsigned int ustart = 0x80000000u;
   3761 		const float ustartf = float(ustart);
   3762 
   3763 		// If the value is negative, store 0, otherwise store the result of the conversion
   3764 		storeValue((~(As<Int>(cast) >> 31) &
   3765 		// Check if the value can be represented as an Int
   3766 			IfThenElse(cast >= ustartf,
   3767 		// If the value is too large, subtract ustart and re-add it after conversion.
   3768 				As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
   3769 		// Otherwise, just convert normally
   3770 				Int(cast))).value);
   3771 	}
   3772 
   3773 	UInt::UInt(int x)
   3774 	{
   3775 		storeValue(Nucleus::createConstantInt(x));
   3776 	}
   3777 
   3778 	UInt::UInt(unsigned int x)
   3779 	{
   3780 		storeValue(Nucleus::createConstantInt(x));
   3781 	}
   3782 
   3783 	UInt::UInt(RValue<UInt> rhs)
   3784 	{
   3785 		storeValue(rhs.value);
   3786 	}
   3787 
   3788 	UInt::UInt(RValue<Int> rhs)
   3789 	{
   3790 		storeValue(rhs.value);
   3791 	}
   3792 
   3793 	UInt::UInt(const UInt &rhs)
   3794 	{
   3795 		Value *value = rhs.loadValue();
   3796 		storeValue(value);
   3797 	}
   3798 
   3799 	UInt::UInt(const Reference<UInt> &rhs)
   3800 	{
   3801 		Value *value = rhs.loadValue();
   3802 		storeValue(value);
   3803 	}
   3804 
   3805 	UInt::UInt(const Int &rhs)
   3806 	{
   3807 		Value *value = rhs.loadValue();
   3808 		storeValue(value);
   3809 	}
   3810 
   3811 	UInt::UInt(const Reference<Int> &rhs)
   3812 	{
   3813 		Value *value = rhs.loadValue();
   3814 		storeValue(value);
   3815 	}
   3816 
   3817 	RValue<UInt> UInt::operator=(unsigned int rhs)
   3818 	{
   3819 		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
   3820 	}
   3821 
   3822 	RValue<UInt> UInt::operator=(RValue<UInt> rhs)
   3823 	{
   3824 		storeValue(rhs.value);
   3825 
   3826 		return rhs;
   3827 	}
   3828 
   3829 	RValue<UInt> UInt::operator=(RValue<Int> rhs)
   3830 	{
   3831 		storeValue(rhs.value);
   3832 
   3833 		return RValue<UInt>(rhs);
   3834 	}
   3835 
   3836 	RValue<UInt> UInt::operator=(const UInt &rhs)
   3837 	{
   3838 		Value *value = rhs.loadValue();
   3839 		storeValue(value);
   3840 
   3841 		return RValue<UInt>(value);
   3842 	}
   3843 
   3844 	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
   3845 	{
   3846 		Value *value = rhs.loadValue();
   3847 		storeValue(value);
   3848 
   3849 		return RValue<UInt>(value);
   3850 	}
   3851 
   3852 	RValue<UInt> UInt::operator=(const Int &rhs)
   3853 	{
   3854 		Value *value = rhs.loadValue();
   3855 		storeValue(value);
   3856 
   3857 		return RValue<UInt>(value);
   3858 	}
   3859 
   3860 	RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
   3861 	{
   3862 		Value *value = rhs.loadValue();
   3863 		storeValue(value);
   3864 
   3865 		return RValue<UInt>(value);
   3866 	}
   3867 
   3868 	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
   3869 	{
   3870 		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
   3871 	}
   3872 
   3873 	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
   3874 	{
   3875 		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
   3876 	}
   3877 
   3878 	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
   3879 	{
   3880 		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
   3881 	}
   3882 
   3883 	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
   3884 	{
   3885 		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
   3886 	}
   3887 
   3888 	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
   3889 	{
   3890 		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
   3891 	}
   3892 
   3893 	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
   3894 	{
   3895 		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
   3896 	}
   3897 
   3898 	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
   3899 	{
   3900 		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
   3901 	}
   3902 
   3903 	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
   3904 	{
   3905 		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
   3906 	}
   3907 
   3908 	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
   3909 	{
   3910 		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
   3911 	}
   3912 
   3913 	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
   3914 	{
   3915 		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
   3916 	}
   3917 
   3918 	RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
   3919 	{
   3920 		return lhs = lhs + rhs;
   3921 	}
   3922 
   3923 	RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
   3924 	{
   3925 		return lhs = lhs - rhs;
   3926 	}
   3927 
   3928 	RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
   3929 	{
   3930 		return lhs = lhs * rhs;
   3931 	}
   3932 
   3933 	RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
   3934 	{
   3935 		return lhs = lhs / rhs;
   3936 	}
   3937 
   3938 	RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
   3939 	{
   3940 		return lhs = lhs % rhs;
   3941 	}
   3942 
   3943 	RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
   3944 	{
   3945 		return lhs = lhs & rhs;
   3946 	}
   3947 
   3948 	RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
   3949 	{
   3950 		return lhs = lhs | rhs;
   3951 	}
   3952 
   3953 	RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
   3954 	{
   3955 		return lhs = lhs ^ rhs;
   3956 	}
   3957 
   3958 	RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
   3959 	{
   3960 		return lhs = lhs << rhs;
   3961 	}
   3962 
   3963 	RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
   3964 	{
   3965 		return lhs = lhs >> rhs;
   3966 	}
   3967 
   3968 	RValue<UInt> operator+(RValue<UInt> val)
   3969 	{
   3970 		return val;
   3971 	}
   3972 
   3973 	RValue<UInt> operator-(RValue<UInt> val)
   3974 	{
   3975 		return RValue<UInt>(Nucleus::createNeg(val.value));
   3976 	}
   3977 
   3978 	RValue<UInt> operator~(RValue<UInt> val)
   3979 	{
   3980 		return RValue<UInt>(Nucleus::createNot(val.value));
   3981 	}
   3982 
   3983 	RValue<UInt> operator++(UInt &val, int)   // Post-increment
   3984 	{
   3985 		RValue<UInt> res = val;
   3986 
   3987 		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
   3988 		val.storeValue(inc);
   3989 
   3990 		return res;
   3991 	}
   3992 
   3993 	const UInt &operator++(UInt &val)   // Pre-increment
   3994 	{
   3995 		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
   3996 		val.storeValue(inc);
   3997 
   3998 		return val;
   3999 	}
   4000 
   4001 	RValue<UInt> operator--(UInt &val, int)   // Post-decrement
   4002 	{
   4003 		RValue<UInt> res = val;
   4004 
   4005 		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
   4006 		val.storeValue(inc);
   4007 
   4008 		return res;
   4009 	}
   4010 
   4011 	const UInt &operator--(UInt &val)   // Pre-decrement
   4012 	{
   4013 		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
   4014 		val.storeValue(inc);
   4015 
   4016 		return val;
   4017 	}
   4018 
   4019 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
   4020 	{
   4021 		return IfThenElse(x > y, x, y);
   4022 	}
   4023 
   4024 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
   4025 	{
   4026 		return IfThenElse(x < y, x, y);
   4027 	}
   4028 
   4029 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
   4030 	{
   4031 		return Min(Max(x, min), max);
   4032 	}
   4033 
   4034 	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
   4035 	{
   4036 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   4037 	}
   4038 
   4039 	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
   4040 	{
   4041 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   4042 	}
   4043 
   4044 	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
   4045 	{
   4046 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   4047 	}
   4048 
   4049 	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
   4050 	{
   4051 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   4052 	}
   4053 
   4054 	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
   4055 	{
   4056 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   4057 	}
   4058 
   4059 	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
   4060 	{
   4061 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   4062 	}
   4063 
   4064 //	RValue<UInt> RoundUInt(RValue<Float> cast)
   4065 //	{
   4066 //		return x86::cvtss2si(val);   // FIXME: Unsigned
   4067 //
   4068 //	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
   4069 //	}
   4070 
   4071 	Type *UInt::getType()
   4072 	{
   4073 		return T(llvm::Type::getInt32Ty(*::context));
   4074 	}
   4075 
   4076 //	Int2::Int2(RValue<Int> cast)
   4077 //	{
   4078 //		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
   4079 //		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
   4080 //
   4081 //		int shuffle[2] = {0, 0};
   4082 //		Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
   4083 //
   4084 //		storeValue(replicate);
   4085 //	}
   4086 
   4087 	Int2::Int2(RValue<Int4> cast)
   4088 	{
   4089 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   4090 	}
   4091 
   4092 	Int2::Int2(int x, int y)
   4093 	{
   4094 		int64_t constantVector[2] = {x, y};
   4095 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   4096 	}
   4097 
   4098 	Int2::Int2(RValue<Int2> rhs)
   4099 	{
   4100 		storeValue(rhs.value);
   4101 	}
   4102 
   4103 	Int2::Int2(const Int2 &rhs)
   4104 	{
   4105 		Value *value = rhs.loadValue();
   4106 		storeValue(value);
   4107 	}
   4108 
   4109 	Int2::Int2(const Reference<Int2> &rhs)
   4110 	{
   4111 		Value *value = rhs.loadValue();
   4112 		storeValue(value);
   4113 	}
   4114 
   4115 	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
   4116 	{
   4117 		int shuffle[4] = {0, 4, 1, 5};
   4118 		Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
   4119 
   4120 		storeValue(Nucleus::createBitCast(packed, Int2::getType()));
   4121 	}
   4122 
   4123 	RValue<Int2> Int2::operator=(RValue<Int2> rhs)
   4124 	{
   4125 		storeValue(rhs.value);
   4126 
   4127 		return rhs;
   4128 	}
   4129 
   4130 	RValue<Int2> Int2::operator=(const Int2 &rhs)
   4131 	{
   4132 		Value *value = rhs.loadValue();
   4133 		storeValue(value);
   4134 
   4135 		return RValue<Int2>(value);
   4136 	}
   4137 
   4138 	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
   4139 	{
   4140 		Value *value = rhs.loadValue();
   4141 		storeValue(value);
   4142 
   4143 		return RValue<Int2>(value);
   4144 	}
   4145 
   4146 	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
   4147 	{
   4148 		return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
   4149 	}
   4150 
   4151 	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
   4152 	{
   4153 		return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
   4154 	}
   4155 
   4156 //	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
   4157 //	{
   4158 //		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
   4159 //	}
   4160 
   4161 //	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
   4162 //	{
   4163 //		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
   4164 //	}
   4165 
   4166 //	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
   4167 //	{
   4168 //		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
   4169 //	}
   4170 
   4171 	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
   4172 	{
   4173 		return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
   4174 	}
   4175 
   4176 	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
   4177 	{
   4178 		return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
   4179 	}
   4180 
   4181 	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
   4182 	{
   4183 		return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
   4184 	}
   4185 
   4186 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
   4187 	{
   4188 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
   4189 
   4190 		return x86::pslld(lhs, rhs);
   4191 	}
   4192 
   4193 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
   4194 	{
   4195 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
   4196 
   4197 		return x86::psrad(lhs, rhs);
   4198 	}
   4199 
   4200 	RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
   4201 	{
   4202 		return lhs = lhs + rhs;
   4203 	}
   4204 
   4205 	RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
   4206 	{
   4207 		return lhs = lhs - rhs;
   4208 	}
   4209 
   4210 //	RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
   4211 //	{
   4212 //		return lhs = lhs * rhs;
   4213 //	}
   4214 
   4215 //	RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
   4216 //	{
   4217 //		return lhs = lhs / rhs;
   4218 //	}
   4219 
   4220 //	RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
   4221 //	{
   4222 //		return lhs = lhs % rhs;
   4223 //	}
   4224 
   4225 	RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
   4226 	{
   4227 		return lhs = lhs & rhs;
   4228 	}
   4229 
   4230 	RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
   4231 	{
   4232 		return lhs = lhs | rhs;
   4233 	}
   4234 
   4235 	RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
   4236 	{
   4237 		return lhs = lhs ^ rhs;
   4238 	}
   4239 
   4240 	RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
   4241 	{
   4242 		return lhs = lhs << rhs;
   4243 	}
   4244 
   4245 	RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
   4246 	{
   4247 		return lhs = lhs >> rhs;
   4248 	}
   4249 
   4250 //	RValue<Int2> operator+(RValue<Int2> val)
   4251 //	{
   4252 //		return val;
   4253 //	}
   4254 
   4255 //	RValue<Int2> operator-(RValue<Int2> val)
   4256 //	{
   4257 //		return RValue<Int2>(Nucleus::createNeg(val.value));
   4258 //	}
   4259 
   4260 	RValue<Int2> operator~(RValue<Int2> val)
   4261 	{
   4262 		return RValue<Int2>(Nucleus::createNot(val.value));
   4263 	}
   4264 
   4265 	RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
   4266 	{
   4267 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
   4268 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   4269 	}
   4270 
   4271 	RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
   4272 	{
   4273 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
   4274 		auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   4275 		return As<Short4>(Swizzle(lowHigh, 0xEE));
   4276 	}
   4277 
   4278 	RValue<Int> Extract(RValue<Int2> val, int i)
   4279 	{
   4280 		return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
   4281 	}
   4282 
   4283 	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
   4284 	{
   4285 		return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
   4286 	}
   4287 
   4288 	Type *Int2::getType()
   4289 	{
   4290 		return T(Type_v2i32);
   4291 	}
   4292 
   4293 	UInt2::UInt2(unsigned int x, unsigned int y)
   4294 	{
   4295 		int64_t constantVector[2] = {x, y};
   4296 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   4297 	}
   4298 
   4299 	UInt2::UInt2(RValue<UInt2> rhs)
   4300 	{
   4301 		storeValue(rhs.value);
   4302 	}
   4303 
   4304 	UInt2::UInt2(const UInt2 &rhs)
   4305 	{
   4306 		Value *value = rhs.loadValue();
   4307 		storeValue(value);
   4308 	}
   4309 
   4310 	UInt2::UInt2(const Reference<UInt2> &rhs)
   4311 	{
   4312 		Value *value = rhs.loadValue();
   4313 		storeValue(value);
   4314 	}
   4315 
   4316 	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
   4317 	{
   4318 		storeValue(rhs.value);
   4319 
   4320 		return rhs;
   4321 	}
   4322 
   4323 	RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
   4324 	{
   4325 		Value *value = rhs.loadValue();
   4326 		storeValue(value);
   4327 
   4328 		return RValue<UInt2>(value);
   4329 	}
   4330 
   4331 	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
   4332 	{
   4333 		Value *value = rhs.loadValue();
   4334 		storeValue(value);
   4335 
   4336 		return RValue<UInt2>(value);
   4337 	}
   4338 
   4339 	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4340 	{
   4341 		return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
   4342 	}
   4343 
   4344 	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4345 	{
   4346 		return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
   4347 	}
   4348 
   4349 //	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4350 //	{
   4351 //		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
   4352 //	}
   4353 
   4354 //	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4355 //	{
   4356 //		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
   4357 //	}
   4358 
   4359 //	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4360 //	{
   4361 //		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
   4362 //	}
   4363 
   4364 	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4365 	{
   4366 		return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
   4367 	}
   4368 
   4369 	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4370 	{
   4371 		return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
   4372 	}
   4373 
   4374 	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
   4375 	{
   4376 		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
   4377 	}
   4378 
   4379 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
   4380 	{
   4381 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
   4382 
   4383 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
   4384 	}
   4385 
   4386 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
   4387 	{
   4388 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
   4389 
   4390 		return x86::psrld(lhs, rhs);
   4391 	}
   4392 
   4393 	RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
   4394 	{
   4395 		return lhs = lhs + rhs;
   4396 	}
   4397 
   4398 	RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
   4399 	{
   4400 		return lhs = lhs - rhs;
   4401 	}
   4402 
   4403 //	RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
   4404 //	{
   4405 //		return lhs = lhs * rhs;
   4406 //	}
   4407 
   4408 //	RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
   4409 //	{
   4410 //		return lhs = lhs / rhs;
   4411 //	}
   4412 
   4413 //	RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
   4414 //	{
   4415 //		return lhs = lhs % rhs;
   4416 //	}
   4417 
   4418 	RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
   4419 	{
   4420 		return lhs = lhs & rhs;
   4421 	}
   4422 
   4423 	RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
   4424 	{
   4425 		return lhs = lhs | rhs;
   4426 	}
   4427 
   4428 	RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
   4429 	{
   4430 		return lhs = lhs ^ rhs;
   4431 	}
   4432 
   4433 	RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
   4434 	{
   4435 		return lhs = lhs << rhs;
   4436 	}
   4437 
   4438 	RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
   4439 	{
   4440 		return lhs = lhs >> rhs;
   4441 	}
   4442 
   4443 //	RValue<UInt2> operator+(RValue<UInt2> val)
   4444 //	{
   4445 //		return val;
   4446 //	}
   4447 
   4448 //	RValue<UInt2> operator-(RValue<UInt2> val)
   4449 //	{
   4450 //		return RValue<UInt2>(Nucleus::createNeg(val.value));
   4451 //	}
   4452 
   4453 	RValue<UInt2> operator~(RValue<UInt2> val)
   4454 	{
   4455 		return RValue<UInt2>(Nucleus::createNot(val.value));
   4456 	}
   4457 
   4458 	Type *UInt2::getType()
   4459 	{
   4460 		return T(Type_v2i32);
   4461 	}
   4462 
   4463 	Int4::Int4() : XYZW(this)
   4464 	{
   4465 	}
   4466 
   4467 	Int4::Int4(RValue<Byte4> cast) : XYZW(this)
   4468 	{
   4469 		if(CPUID::supportsSSE4_1())
   4470 		{
   4471 			*this = x86::pmovzxbd(As<Byte16>(cast));
   4472 		}
   4473 		else
   4474 		{
   4475 			int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
   4476 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
   4477 			Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
   4478 
   4479 			int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
   4480 			Value *c = Nucleus::createBitCast(b, Short8::getType());
   4481 			Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
   4482 
   4483 			*this = As<Int4>(d);
   4484 		}
   4485 	}
   4486 
   4487 	Int4::Int4(RValue<SByte4> cast) : XYZW(this)
   4488 	{
   4489 		if(CPUID::supportsSSE4_1())
   4490 		{
   4491 			*this = x86::pmovsxbd(As<SByte16>(cast));
   4492 		}
   4493 		else
   4494 		{
   4495 			int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
   4496 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
   4497 			Value *b = Nucleus::createShuffleVector(a, a, swizzle);
   4498 
   4499 			int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
   4500 			Value *c = Nucleus::createBitCast(b, Short8::getType());
   4501 			Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
   4502 
   4503 			*this = As<Int4>(d) >> 24;
   4504 		}
   4505 	}
   4506 
   4507 	Int4::Int4(RValue<Float4> cast) : XYZW(this)
   4508 	{
   4509 		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
   4510 
   4511 		storeValue(xyzw);
   4512 	}
   4513 
   4514 	Int4::Int4(RValue<Short4> cast) : XYZW(this)
   4515 	{
   4516 		if(CPUID::supportsSSE4_1())
   4517 		{
   4518 			*this = x86::pmovsxwd(As<Short8>(cast));
   4519 		}
   4520 		else
   4521 		{
   4522 			int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
   4523 			Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
   4524 			*this = As<Int4>(c) >> 16;
   4525 		}
   4526 	}
   4527 
   4528 	Int4::Int4(RValue<UShort4> cast) : XYZW(this)
   4529 	{
   4530 		if(CPUID::supportsSSE4_1())
   4531 		{
   4532 			*this = x86::pmovzxwd(As<UShort8>(cast));
   4533 		}
   4534 		else
   4535 		{
   4536 			int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
   4537 			Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
   4538 			*this = As<Int4>(c);
   4539 		}
   4540 	}
   4541 
   4542 	Int4::Int4(int xyzw) : XYZW(this)
   4543 	{
   4544 		constant(xyzw, xyzw, xyzw, xyzw);
   4545 	}
   4546 
   4547 	Int4::Int4(int x, int yzw) : XYZW(this)
   4548 	{
   4549 		constant(x, yzw, yzw, yzw);
   4550 	}
   4551 
   4552 	Int4::Int4(int x, int y, int zw) : XYZW(this)
   4553 	{
   4554 		constant(x, y, zw, zw);
   4555 	}
   4556 
   4557 	Int4::Int4(int x, int y, int z, int w) : XYZW(this)
   4558 	{
   4559 		constant(x, y, z, w);
   4560 	}
   4561 
   4562 	void Int4::constant(int x, int y, int z, int w)
   4563 	{
   4564 		int64_t constantVector[4] = {x, y, z, w};
   4565 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   4566 	}
   4567 
   4568 	Int4::Int4(RValue<Int4> rhs) : XYZW(this)
   4569 	{
   4570 		storeValue(rhs.value);
   4571 	}
   4572 
   4573 	Int4::Int4(const Int4 &rhs) : XYZW(this)
   4574 	{
   4575 		Value *value = rhs.loadValue();
   4576 		storeValue(value);
   4577 	}
   4578 
   4579 	Int4::Int4(const Reference<Int4> &rhs) : XYZW(this)
   4580 	{
   4581 		Value *value = rhs.loadValue();
   4582 		storeValue(value);
   4583 	}
   4584 
   4585 	Int4::Int4(RValue<UInt4> rhs) : XYZW(this)
   4586 	{
   4587 		storeValue(rhs.value);
   4588 	}
   4589 
   4590 	Int4::Int4(const UInt4 &rhs) : XYZW(this)
   4591 	{
   4592 		Value *value = rhs.loadValue();
   4593 		storeValue(value);
   4594 	}
   4595 
   4596 	Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this)
   4597 	{
   4598 		Value *value = rhs.loadValue();
   4599 		storeValue(value);
   4600 	}
   4601 
   4602 	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this)
   4603 	{
   4604 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
   4605 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   4606 
   4607 		storeValue(packed);
   4608 	}
   4609 
   4610 	Int4::Int4(RValue<Int> rhs) : XYZW(this)
   4611 	{
   4612 		Value *vector = loadValue();
   4613 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
   4614 
   4615 		int swizzle[4] = {0, 0, 0, 0};
   4616 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
   4617 
   4618 		storeValue(replicate);
   4619 	}
   4620 
   4621 	Int4::Int4(const Int &rhs) : XYZW(this)
   4622 	{
   4623 		*this = RValue<Int>(rhs.loadValue());
   4624 	}
   4625 
   4626 	Int4::Int4(const Reference<Int> &rhs) : XYZW(this)
   4627 	{
   4628 		*this = RValue<Int>(rhs.loadValue());
   4629 	}
   4630 
   4631 	RValue<Int4> Int4::operator=(RValue<Int4> rhs)
   4632 	{
   4633 		storeValue(rhs.value);
   4634 
   4635 		return rhs;
   4636 	}
   4637 
   4638 	RValue<Int4> Int4::operator=(const Int4 &rhs)
   4639 	{
   4640 		Value *value = rhs.loadValue();
   4641 		storeValue(value);
   4642 
   4643 		return RValue<Int4>(value);
   4644 	}
   4645 
   4646 	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
   4647 	{
   4648 		Value *value = rhs.loadValue();
   4649 		storeValue(value);
   4650 
   4651 		return RValue<Int4>(value);
   4652 	}
   4653 
   4654 	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
   4655 	{
   4656 		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
   4657 	}
   4658 
   4659 	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
   4660 	{
   4661 		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
   4662 	}
   4663 
   4664 	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
   4665 	{
   4666 		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
   4667 	}
   4668 
   4669 	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
   4670 	{
   4671 		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
   4672 	}
   4673 
   4674 	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
   4675 	{
   4676 		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
   4677 	}
   4678 
   4679 	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
   4680 	{
   4681 		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
   4682 	}
   4683 
   4684 	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
   4685 	{
   4686 		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
   4687 	}
   4688 
   4689 	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
   4690 	{
   4691 		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
   4692 	}
   4693 
   4694 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
   4695 	{
   4696 		return x86::pslld(lhs, rhs);
   4697 	}
   4698 
   4699 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
   4700 	{
   4701 		return x86::psrad(lhs, rhs);
   4702 	}
   4703 
   4704 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
   4705 	{
   4706 		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
   4707 	}
   4708 
   4709 	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
   4710 	{
   4711 		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
   4712 	}
   4713 
   4714 	RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
   4715 	{
   4716 		return lhs = lhs + rhs;
   4717 	}
   4718 
   4719 	RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
   4720 	{
   4721 		return lhs = lhs - rhs;
   4722 	}
   4723 
   4724 	RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
   4725 	{
   4726 		return lhs = lhs * rhs;
   4727 	}
   4728 
   4729 //	RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
   4730 //	{
   4731 //		return lhs = lhs / rhs;
   4732 //	}
   4733 
   4734 //	RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
   4735 //	{
   4736 //		return lhs = lhs % rhs;
   4737 //	}
   4738 
   4739 	RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
   4740 	{
   4741 		return lhs = lhs & rhs;
   4742 	}
   4743 
   4744 	RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
   4745 	{
   4746 		return lhs = lhs | rhs;
   4747 	}
   4748 
   4749 	RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
   4750 	{
   4751 		return lhs = lhs ^ rhs;
   4752 	}
   4753 
   4754 	RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
   4755 	{
   4756 		return lhs = lhs << rhs;
   4757 	}
   4758 
   4759 	RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
   4760 	{
   4761 		return lhs = lhs >> rhs;
   4762 	}
   4763 
   4764 	RValue<Int4> operator+(RValue<Int4> val)
   4765 	{
   4766 		return val;
   4767 	}
   4768 
   4769 	RValue<Int4> operator-(RValue<Int4> val)
   4770 	{
   4771 		return RValue<Int4>(Nucleus::createNeg(val.value));
   4772 	}
   4773 
   4774 	RValue<Int4> operator~(RValue<Int4> val)
   4775 	{
   4776 		return RValue<Int4>(Nucleus::createNot(val.value));
   4777 	}
   4778 
   4779 	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
   4780 	{
   4781 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4782 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4783 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   4784 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4785 	}
   4786 
   4787 	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
   4788 	{
   4789 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4790 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4791 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
   4792 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4793 	}
   4794 
   4795 	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
   4796 	{
   4797 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4798 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4799 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
   4800 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4801 	}
   4802 
   4803 	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
   4804 	{
   4805 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4806 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4807 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   4808 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4809 	}
   4810 
   4811 	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
   4812 	{
   4813 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4814 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4815 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
   4816 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4817 	}
   4818 
   4819 	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
   4820 	{
   4821 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   4822 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   4823 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
   4824 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   4825 	}
   4826 
   4827 	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
   4828 	{
   4829 		if(CPUID::supportsSSE4_1())
   4830 		{
   4831 			return x86::pmaxsd(x, y);
   4832 		}
   4833 		else
   4834 		{
   4835 			RValue<Int4> greater = CmpNLE(x, y);
   4836 			return (x & greater) | (y & ~greater);
   4837 		}
   4838 	}
   4839 
   4840 	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
   4841 	{
   4842 		if(CPUID::supportsSSE4_1())
   4843 		{
   4844 			return x86::pminsd(x, y);
   4845 		}
   4846 		else
   4847 		{
   4848 			RValue<Int4> less = CmpLT(x, y);
   4849 			return (x & less) | (y & ~less);
   4850 		}
   4851 	}
   4852 
   4853 	RValue<Int4> RoundInt(RValue<Float4> cast)
   4854 	{
   4855 		return x86::cvtps2dq(cast);
   4856 	}
   4857 
   4858 	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
   4859 	{
   4860 		return x86::packssdw(x, y);
   4861 	}
   4862 
   4863 	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
   4864 	{
   4865 		return x86::packusdw(x, y);
   4866 	}
   4867 
   4868 	RValue<Int> Extract(RValue<Int4> x, int i)
   4869 	{
   4870 		return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
   4871 	}
   4872 
   4873 	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
   4874 	{
   4875 		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
   4876 	}
   4877 
   4878 	RValue<Int> SignMask(RValue<Int4> x)
   4879 	{
   4880 		return x86::movmskps(As<Float4>(x));
   4881 	}
   4882 
   4883 	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
   4884 	{
   4885 		return RValue<Int4>(createSwizzle4(x.value, select));
   4886 	}
   4887 
   4888 	Type *Int4::getType()
   4889 	{
   4890 		return T(llvm::VectorType::get(T(Int::getType()), 4));
   4891 	}
   4892 
   4893 	UInt4::UInt4() : XYZW(this)
   4894 	{
   4895 	}
   4896 
   4897 	UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
   4898 	{
   4899 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
   4900 		// Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
   4901 
   4902 		// Smallest positive value representable in UInt, but not in Int
   4903 		const unsigned int ustart = 0x80000000u;
   4904 		const float ustartf = float(ustart);
   4905 
   4906 		// Check if the value can be represented as an Int
   4907 		Int4 uiValue = CmpNLT(cast, Float4(ustartf));
   4908 		// If the value is too large, subtract ustart and re-add it after conversion.
   4909 		uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
   4910 		// Otherwise, just convert normally
   4911 		          (~uiValue & Int4(cast));
   4912 		// If the value is negative, store 0, otherwise store the result of the conversion
   4913 		storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
   4914 	}
   4915 
   4916 	UInt4::UInt4(int xyzw) : XYZW(this)
   4917 	{
   4918 		constant(xyzw, xyzw, xyzw, xyzw);
   4919 	}
   4920 
   4921 	UInt4::UInt4(int x, int yzw) : XYZW(this)
   4922 	{
   4923 		constant(x, yzw, yzw, yzw);
   4924 	}
   4925 
   4926 	UInt4::UInt4(int x, int y, int zw) : XYZW(this)
   4927 	{
   4928 		constant(x, y, zw, zw);
   4929 	}
   4930 
   4931 	UInt4::UInt4(int x, int y, int z, int w) : XYZW(this)
   4932 	{
   4933 		constant(x, y, z, w);
   4934 	}
   4935 
   4936 	void UInt4::constant(int x, int y, int z, int w)
   4937 	{
   4938 		int64_t constantVector[4] = {x, y, z, w};
   4939 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   4940 	}
   4941 
   4942 	UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this)
   4943 	{
   4944 		storeValue(rhs.value);
   4945 	}
   4946 
   4947 	UInt4::UInt4(const UInt4 &rhs) : XYZW(this)
   4948 	{
   4949 		Value *value = rhs.loadValue();
   4950 		storeValue(value);
   4951 	}
   4952 
   4953 	UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this)
   4954 	{
   4955 		Value *value = rhs.loadValue();
   4956 		storeValue(value);
   4957 	}
   4958 
   4959 	UInt4::UInt4(RValue<Int4> rhs) : XYZW(this)
   4960 	{
   4961 		storeValue(rhs.value);
   4962 	}
   4963 
   4964 	UInt4::UInt4(const Int4 &rhs) : XYZW(this)
   4965 	{
   4966 		Value *value = rhs.loadValue();
   4967 		storeValue(value);
   4968 	}
   4969 
   4970 	UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this)
   4971 	{
   4972 		Value *value = rhs.loadValue();
   4973 		storeValue(value);
   4974 	}
   4975 
   4976 	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this)
   4977 	{
   4978 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
   4979 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
   4980 
   4981 		storeValue(packed);
   4982 	}
   4983 
   4984 	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
   4985 	{
   4986 		storeValue(rhs.value);
   4987 
   4988 		return rhs;
   4989 	}
   4990 
   4991 	RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
   4992 	{
   4993 		Value *value = rhs.loadValue();
   4994 		storeValue(value);
   4995 
   4996 		return RValue<UInt4>(value);
   4997 	}
   4998 
   4999 	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
   5000 	{
   5001 		Value *value = rhs.loadValue();
   5002 		storeValue(value);
   5003 
   5004 		return RValue<UInt4>(value);
   5005 	}
   5006 
   5007 	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5008 	{
   5009 		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
   5010 	}
   5011 
   5012 	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5013 	{
   5014 		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
   5015 	}
   5016 
   5017 	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5018 	{
   5019 		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
   5020 	}
   5021 
   5022 	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5023 	{
   5024 		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
   5025 	}
   5026 
   5027 	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5028 	{
   5029 		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
   5030 	}
   5031 
   5032 	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5033 	{
   5034 		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
   5035 	}
   5036 
   5037 	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5038 	{
   5039 		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
   5040 	}
   5041 
   5042 	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5043 	{
   5044 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
   5045 	}
   5046 
   5047 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
   5048 	{
   5049 		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
   5050 	}
   5051 
   5052 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
   5053 	{
   5054 		return x86::psrld(lhs, rhs);
   5055 	}
   5056 
   5057 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5058 	{
   5059 		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
   5060 	}
   5061 
   5062 	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5063 	{
   5064 		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
   5065 	}
   5066 
   5067 	RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
   5068 	{
   5069 		return lhs = lhs + rhs;
   5070 	}
   5071 
   5072 	RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
   5073 	{
   5074 		return lhs = lhs - rhs;
   5075 	}
   5076 
   5077 	RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
   5078 	{
   5079 		return lhs = lhs * rhs;
   5080 	}
   5081 
   5082 //	RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
   5083 //	{
   5084 //		return lhs = lhs / rhs;
   5085 //	}
   5086 
   5087 //	RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
   5088 //	{
   5089 //		return lhs = lhs % rhs;
   5090 //	}
   5091 
   5092 	RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
   5093 	{
   5094 		return lhs = lhs & rhs;
   5095 	}
   5096 
   5097 	RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
   5098 	{
   5099 		return lhs = lhs | rhs;
   5100 	}
   5101 
   5102 	RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
   5103 	{
   5104 		return lhs = lhs ^ rhs;
   5105 	}
   5106 
   5107 	RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
   5108 	{
   5109 		return lhs = lhs << rhs;
   5110 	}
   5111 
   5112 	RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
   5113 	{
   5114 		return lhs = lhs >> rhs;
   5115 	}
   5116 
   5117 	RValue<UInt4> operator+(RValue<UInt4> val)
   5118 	{
   5119 		return val;
   5120 	}
   5121 
   5122 	RValue<UInt4> operator-(RValue<UInt4> val)
   5123 	{
   5124 		return RValue<UInt4>(Nucleus::createNeg(val.value));
   5125 	}
   5126 
   5127 	RValue<UInt4> operator~(RValue<UInt4> val)
   5128 	{
   5129 		return RValue<UInt4>(Nucleus::createNot(val.value));
   5130 	}
   5131 
   5132 	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
   5133 	{
   5134 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5135 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5136 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   5137 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5138 	}
   5139 
   5140 	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
   5141 	{
   5142 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
   5143 	}
   5144 
   5145 	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
   5146 	{
   5147 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5148 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5149 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
   5150 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5151 	}
   5152 
   5153 	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
   5154 	{
   5155 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   5156 	}
   5157 
   5158 	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
   5159 	{
   5160 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5161 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5162 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
   5163 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5164 	}
   5165 
   5166 	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
   5167 	{
   5168 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
   5169 	}
   5170 
   5171 	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
   5172 	{
   5173 		if(CPUID::supportsSSE4_1())
   5174 		{
   5175 			return x86::pmaxud(x, y);
   5176 		}
   5177 		else
   5178 		{
   5179 			RValue<UInt4> greater = CmpNLE(x, y);
   5180 			return (x & greater) | (y & ~greater);
   5181 		}
   5182 	}
   5183 
   5184 	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
   5185 	{
   5186 		if(CPUID::supportsSSE4_1())
   5187 		{
   5188 			return x86::pminud(x, y);
   5189 		}
   5190 		else
   5191 		{
   5192 			RValue<UInt4> less = CmpLT(x, y);
   5193 			return (x & less) | (y & ~less);
   5194 		}
   5195 	}
   5196 
   5197 	Type *UInt4::getType()
   5198 	{
   5199 		return T(llvm::VectorType::get(T(UInt::getType()), 4));
   5200 	}
   5201 
   5202 	Float::Float(RValue<Int> cast)
   5203 	{
   5204 		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
   5205 
   5206 		storeValue(integer);
   5207 	}
   5208 
   5209 	Float::Float(RValue<UInt> cast)
   5210 	{
   5211 		RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
   5212 		                       As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
   5213 
   5214 		storeValue(result.value);
   5215 	}
   5216 
   5217 	Float::Float(float x)
   5218 	{
   5219 		storeValue(Nucleus::createConstantFloat(x));
   5220 	}
   5221 
   5222 	Float::Float(RValue<Float> rhs)
   5223 	{
   5224 		storeValue(rhs.value);
   5225 	}
   5226 
   5227 	Float::Float(const Float &rhs)
   5228 	{
   5229 		Value *value = rhs.loadValue();
   5230 		storeValue(value);
   5231 	}
   5232 
   5233 	Float::Float(const Reference<Float> &rhs)
   5234 	{
   5235 		Value *value = rhs.loadValue();
   5236 		storeValue(value);
   5237 	}
   5238 
   5239 	RValue<Float> Float::operator=(RValue<Float> rhs)
   5240 	{
   5241 		storeValue(rhs.value);
   5242 
   5243 		return rhs;
   5244 	}
   5245 
   5246 	RValue<Float> Float::operator=(const Float &rhs)
   5247 	{
   5248 		Value *value = rhs.loadValue();
   5249 		storeValue(value);
   5250 
   5251 		return RValue<Float>(value);
   5252 	}
   5253 
   5254 	RValue<Float> Float::operator=(const Reference<Float> &rhs)
   5255 	{
   5256 		Value *value = rhs.loadValue();
   5257 		storeValue(value);
   5258 
   5259 		return RValue<Float>(value);
   5260 	}
   5261 
   5262 	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
   5263 	{
   5264 		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
   5265 	}
   5266 
   5267 	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
   5268 	{
   5269 		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
   5270 	}
   5271 
   5272 	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
   5273 	{
   5274 		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
   5275 	}
   5276 
   5277 	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
   5278 	{
   5279 		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
   5280 	}
   5281 
   5282 	RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
   5283 	{
   5284 		return lhs = lhs + rhs;
   5285 	}
   5286 
   5287 	RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
   5288 	{
   5289 		return lhs = lhs - rhs;
   5290 	}
   5291 
   5292 	RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
   5293 	{
   5294 		return lhs = lhs * rhs;
   5295 	}
   5296 
   5297 	RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
   5298 	{
   5299 		return lhs = lhs / rhs;
   5300 	}
   5301 
   5302 	RValue<Float> operator+(RValue<Float> val)
   5303 	{
   5304 		return val;
   5305 	}
   5306 
   5307 	RValue<Float> operator-(RValue<Float> val)
   5308 	{
   5309 		return RValue<Float>(Nucleus::createFNeg(val.value));
   5310 	}
   5311 
   5312 	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
   5313 	{
   5314 		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
   5315 	}
   5316 
   5317 	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
   5318 	{
   5319 		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
   5320 	}
   5321 
   5322 	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
   5323 	{
   5324 		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
   5325 	}
   5326 
   5327 	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
   5328 	{
   5329 		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
   5330 	}
   5331 
   5332 	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
   5333 	{
   5334 		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
   5335 	}
   5336 
   5337 	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
   5338 	{
   5339 		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
   5340 	}
   5341 
   5342 	RValue<Float> Abs(RValue<Float> x)
   5343 	{
   5344 		return IfThenElse(x > 0.0f, x, -x);
   5345 	}
   5346 
   5347 	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
   5348 	{
   5349 		return IfThenElse(x > y, x, y);
   5350 	}
   5351 
   5352 	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
   5353 	{
   5354 		return IfThenElse(x < y, x, y);
   5355 	}
   5356 
   5357 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
   5358 	{
   5359 		#if defined(__i386__) || defined(__x86_64__)
   5360 			if(exactAtPow2)
   5361 			{
   5362 				// rcpss uses a piecewise-linear approximation which minimizes the relative error
   5363 				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   5364 				return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   5365 			}
   5366 		#endif
   5367 
   5368 		return x86::rcpss(x);
   5369 	}
   5370 
   5371 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
   5372 	{
   5373 		return x86::rsqrtss(x);
   5374 	}
   5375 
   5376 	RValue<Float> Sqrt(RValue<Float> x)
   5377 	{
   5378 		return x86::sqrtss(x);
   5379 	}
   5380 
   5381 	RValue<Float> Round(RValue<Float> x)
   5382 	{
   5383 		if(CPUID::supportsSSE4_1())
   5384 		{
   5385 			return x86::roundss(x, 0);
   5386 		}
   5387 		else
   5388 		{
   5389 			return Float4(Round(Float4(x))).x;
   5390 		}
   5391 	}
   5392 
   5393 	RValue<Float> Trunc(RValue<Float> x)
   5394 	{
   5395 		if(CPUID::supportsSSE4_1())
   5396 		{
   5397 			return x86::roundss(x, 3);
   5398 		}
   5399 		else
   5400 		{
   5401 			return Float(Int(x));   // Rounded toward zero
   5402 		}
   5403 	}
   5404 
   5405 	RValue<Float> Frac(RValue<Float> x)
   5406 	{
   5407 		if(CPUID::supportsSSE4_1())
   5408 		{
   5409 			return x - x86::floorss(x);
   5410 		}
   5411 		else
   5412 		{
   5413 			return Float4(Frac(Float4(x))).x;
   5414 		}
   5415 	}
   5416 
   5417 	RValue<Float> Floor(RValue<Float> x)
   5418 	{
   5419 		if(CPUID::supportsSSE4_1())
   5420 		{
   5421 			return x86::floorss(x);
   5422 		}
   5423 		else
   5424 		{
   5425 			return Float4(Floor(Float4(x))).x;
   5426 		}
   5427 	}
   5428 
   5429 	RValue<Float> Ceil(RValue<Float> x)
   5430 	{
   5431 		if(CPUID::supportsSSE4_1())
   5432 		{
   5433 			return x86::ceilss(x);
   5434 		}
   5435 		else
   5436 		{
   5437 			return Float4(Ceil(Float4(x))).x;
   5438 		}
   5439 	}
   5440 
   5441 	Type *Float::getType()
   5442 	{
   5443 		return T(llvm::Type::getFloatTy(*::context));
   5444 	}
   5445 
   5446 	Float2::Float2(RValue<Float4> cast)
   5447 	{
   5448 		storeValue(Nucleus::createBitCast(cast.value, getType()));
   5449 	}
   5450 
   5451 	Type *Float2::getType()
   5452 	{
   5453 		return T(Type_v2f32);
   5454 	}
   5455 
   5456 	Float4::Float4(RValue<Byte4> cast) : XYZW(this)
   5457 	{
   5458 		Value *a = Int4(cast).loadValue();
   5459 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
   5460 
   5461 		storeValue(xyzw);
   5462 	}
   5463 
   5464 	Float4::Float4(RValue<SByte4> cast) : XYZW(this)
   5465 	{
   5466 		Value *a = Int4(cast).loadValue();
   5467 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
   5468 
   5469 		storeValue(xyzw);
   5470 	}
   5471 
   5472 	Float4::Float4(RValue<Short4> cast) : XYZW(this)
   5473 	{
   5474 		Int4 c(cast);
   5475 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   5476 	}
   5477 
   5478 	Float4::Float4(RValue<UShort4> cast) : XYZW(this)
   5479 	{
   5480 		Int4 c(cast);
   5481 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   5482 	}
   5483 
   5484 	Float4::Float4(RValue<Int4> cast) : XYZW(this)
   5485 	{
   5486 		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
   5487 
   5488 		storeValue(xyzw);
   5489 	}
   5490 
   5491 	Float4::Float4(RValue<UInt4> cast) : XYZW(this)
   5492 	{
   5493 		RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
   5494 		                        As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
   5495 
   5496 		storeValue(result.value);
   5497 	}
   5498 
   5499 	Float4::Float4() : XYZW(this)
   5500 	{
   5501 	}
   5502 
   5503 	Float4::Float4(float xyzw) : XYZW(this)
   5504 	{
   5505 		constant(xyzw, xyzw, xyzw, xyzw);
   5506 	}
   5507 
   5508 	Float4::Float4(float x, float yzw) : XYZW(this)
   5509 	{
   5510 		constant(x, yzw, yzw, yzw);
   5511 	}
   5512 
   5513 	Float4::Float4(float x, float y, float zw) : XYZW(this)
   5514 	{
   5515 		constant(x, y, zw, zw);
   5516 	}
   5517 
   5518 	Float4::Float4(float x, float y, float z, float w) : XYZW(this)
   5519 	{
   5520 		constant(x, y, z, w);
   5521 	}
   5522 
   5523 	void Float4::constant(float x, float y, float z, float w)
   5524 	{
   5525 		double constantVector[4] = {x, y, z, w};
   5526 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
   5527 	}
   5528 
   5529 	Float4::Float4(RValue<Float4> rhs) : XYZW(this)
   5530 	{
   5531 		storeValue(rhs.value);
   5532 	}
   5533 
   5534 	Float4::Float4(const Float4 &rhs) : XYZW(this)
   5535 	{
   5536 		Value *value = rhs.loadValue();
   5537 		storeValue(value);
   5538 	}
   5539 
   5540 	Float4::Float4(const Reference<Float4> &rhs) : XYZW(this)
   5541 	{
   5542 		Value *value = rhs.loadValue();
   5543 		storeValue(value);
   5544 	}
   5545 
   5546 	Float4::Float4(RValue<Float> rhs) : XYZW(this)
   5547 	{
   5548 		Value *vector = loadValue();
   5549 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
   5550 
   5551 		int swizzle[4] = {0, 0, 0, 0};
   5552 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
   5553 
   5554 		storeValue(replicate);
   5555 	}
   5556 
   5557 	Float4::Float4(const Float &rhs) : XYZW(this)
   5558 	{
   5559 		*this = RValue<Float>(rhs.loadValue());
   5560 	}
   5561 
   5562 	Float4::Float4(const Reference<Float> &rhs) : XYZW(this)
   5563 	{
   5564 		*this = RValue<Float>(rhs.loadValue());
   5565 	}
   5566 
   5567 	RValue<Float4> Float4::operator=(float x)
   5568 	{
   5569 		return *this = Float4(x, x, x, x);
   5570 	}
   5571 
   5572 	RValue<Float4> Float4::operator=(RValue<Float4> rhs)
   5573 	{
   5574 		storeValue(rhs.value);
   5575 
   5576 		return rhs;
   5577 	}
   5578 
   5579 	RValue<Float4> Float4::operator=(const Float4 &rhs)
   5580 	{
   5581 		Value *value = rhs.loadValue();
   5582 		storeValue(value);
   5583 
   5584 		return RValue<Float4>(value);
   5585 	}
   5586 
   5587 	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
   5588 	{
   5589 		Value *value = rhs.loadValue();
   5590 		storeValue(value);
   5591 
   5592 		return RValue<Float4>(value);
   5593 	}
   5594 
   5595 	RValue<Float4> Float4::operator=(RValue<Float> rhs)
   5596 	{
   5597 		return *this = Float4(rhs);
   5598 	}
   5599 
   5600 	RValue<Float4> Float4::operator=(const Float &rhs)
   5601 	{
   5602 		return *this = Float4(rhs);
   5603 	}
   5604 
   5605 	RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
   5606 	{
   5607 		return *this = Float4(rhs);
   5608 	}
   5609 
   5610 	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
   5611 	{
   5612 		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
   5613 	}
   5614 
   5615 	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
   5616 	{
   5617 		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
   5618 	}
   5619 
   5620 	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
   5621 	{
   5622 		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
   5623 	}
   5624 
   5625 	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
   5626 	{
   5627 		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
   5628 	}
   5629 
   5630 	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
   5631 	{
   5632 		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
   5633 	}
   5634 
   5635 	RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
   5636 	{
   5637 		return lhs = lhs + rhs;
   5638 	}
   5639 
   5640 	RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
   5641 	{
   5642 		return lhs = lhs - rhs;
   5643 	}
   5644 
   5645 	RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
   5646 	{
   5647 		return lhs = lhs * rhs;
   5648 	}
   5649 
   5650 	RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
   5651 	{
   5652 		return lhs = lhs / rhs;
   5653 	}
   5654 
   5655 	RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
   5656 	{
   5657 		return lhs = lhs % rhs;
   5658 	}
   5659 
   5660 	RValue<Float4> operator+(RValue<Float4> val)
   5661 	{
   5662 		return val;
   5663 	}
   5664 
   5665 	RValue<Float4> operator-(RValue<Float4> val)
   5666 	{
   5667 		return RValue<Float4>(Nucleus::createFNeg(val.value));
   5668 	}
   5669 
   5670 	RValue<Float4> Abs(RValue<Float4> x)
   5671 	{
   5672 		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
   5673 		int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
   5674 		Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
   5675 
   5676 		return As<Float4>(result);
   5677 	}
   5678 
   5679 	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
   5680 	{
   5681 		return x86::maxps(x, y);
   5682 	}
   5683 
   5684 	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
   5685 	{
   5686 		return x86::minps(x, y);
   5687 	}
   5688 
   5689 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
   5690 	{
   5691 		#if defined(__i386__) || defined(__x86_64__)
   5692 			if(exactAtPow2)
   5693 			{
   5694 				// rcpps uses a piecewise-linear approximation which minimizes the relative error
   5695 				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   5696 				return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   5697 			}
   5698 		#endif
   5699 
   5700 		return x86::rcpps(x);
   5701 	}
   5702 
   5703 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
   5704 	{
   5705 		return x86::rsqrtps(x);
   5706 	}
   5707 
   5708 	RValue<Float4> Sqrt(RValue<Float4> x)
   5709 	{
   5710 		return x86::sqrtps(x);
   5711 	}
   5712 
   5713 	RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
   5714 	{
   5715 		return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
   5716 	}
   5717 
   5718 	RValue<Float> Extract(RValue<Float4> x, int i)
   5719 	{
   5720 		return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
   5721 	}
   5722 
   5723 	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
   5724 	{
   5725 		return RValue<Float4>(createSwizzle4(x.value, select));
   5726 	}
   5727 
   5728 	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
   5729 	{
   5730 		int shuffle[4] =
   5731 		{
   5732 			((imm >> 0) & 0x03) + 0,
   5733 			((imm >> 2) & 0x03) + 0,
   5734 			((imm >> 4) & 0x03) + 4,
   5735 			((imm >> 6) & 0x03) + 4,
   5736 		};
   5737 
   5738 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   5739 	}
   5740 
   5741 	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
   5742 	{
   5743 		int shuffle[4] = {0, 4, 1, 5};
   5744 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   5745 	}
   5746 
   5747 	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
   5748 	{
   5749 		int shuffle[4] = {2, 6, 3, 7};
   5750 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
   5751 	}
   5752 
   5753 	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
   5754 	{
   5755 		Value *vector = lhs.loadValue();
   5756 		Value *result = createMask4(vector, rhs.value, select);
   5757 		lhs.storeValue(result);
   5758 
   5759 		return RValue<Float4>(result);
   5760 	}
   5761 
   5762 	RValue<Int> SignMask(RValue<Float4> x)
   5763 	{
   5764 		return x86::movmskps(x);
   5765 	}
   5766 
   5767 	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
   5768 	{
   5769 	//	return As<Int4>(x86::cmpeqps(x, y));
   5770 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
   5771 	}
   5772 
   5773 	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
   5774 	{
   5775 	//	return As<Int4>(x86::cmpltps(x, y));
   5776 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
   5777 	}
   5778 
   5779 	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
   5780 	{
   5781 	//	return As<Int4>(x86::cmpleps(x, y));
   5782 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
   5783 	}
   5784 
   5785 	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
   5786 	{
   5787 	//	return As<Int4>(x86::cmpneqps(x, y));
   5788 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
   5789 	}
   5790 
   5791 	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
   5792 	{
   5793 	//	return As<Int4>(x86::cmpnltps(x, y));
   5794 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
   5795 	}
   5796 
   5797 	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
   5798 	{
   5799 	//	return As<Int4>(x86::cmpnleps(x, y));
   5800 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
   5801 	}
   5802 
   5803 	RValue<Int4> IsInf(RValue<Float4> x)
   5804 	{
   5805 		return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000));
   5806 	}
   5807 
   5808 	RValue<Int4> IsNan(RValue<Float4> x)
   5809 	{
   5810 		return ~CmpEQ(x, x);
   5811 	}
   5812 
   5813 	RValue<Float4> Round(RValue<Float4> x)
   5814 	{
   5815 		if(CPUID::supportsSSE4_1())
   5816 		{
   5817 			return x86::roundps(x, 0);
   5818 		}
   5819 		else
   5820 		{
   5821 			return Float4(RoundInt(x));
   5822 		}
   5823 	}
   5824 
   5825 	RValue<Float4> Trunc(RValue<Float4> x)
   5826 	{
   5827 		if(CPUID::supportsSSE4_1())
   5828 		{
   5829 			return x86::roundps(x, 3);
   5830 		}
   5831 		else
   5832 		{
   5833 			return Float4(Int4(x));
   5834 		}
   5835 	}
   5836 
   5837 	RValue<Float4> Frac(RValue<Float4> x)
   5838 	{
   5839 		Float4 frc;
   5840 
   5841 		if(CPUID::supportsSSE4_1())
   5842 		{
   5843 			frc = x - Floor(x);
   5844 		}
   5845 		else
   5846 		{
   5847 			frc = x - Float4(Int4(x));   // Signed fractional part.
   5848 
   5849 			frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
   5850 		}
   5851 
   5852 		// x - floor(x) can be 1.0 for very small negative x.
   5853 		// Clamp against the value just below 1.0.
   5854 		return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
   5855 	}
   5856 
   5857 	RValue<Float4> Floor(RValue<Float4> x)
   5858 	{
   5859 		if(CPUID::supportsSSE4_1())
   5860 		{
   5861 			return x86::floorps(x);
   5862 		}
   5863 		else
   5864 		{
   5865 			return x - Frac(x);
   5866 		}
   5867 	}
   5868 
   5869 	RValue<Float4> Ceil(RValue<Float4> x)
   5870 	{
   5871 		if(CPUID::supportsSSE4_1())
   5872 		{
   5873 			return x86::ceilps(x);
   5874 		}
   5875 		else
   5876 		{
   5877 			return -Floor(-x);
   5878 		}
   5879 	}
   5880 
   5881 	Type *Float4::getType()
   5882 	{
   5883 		return T(llvm::VectorType::get(T(Float::getType()), 4));
   5884 	}
   5885 
   5886 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
   5887 	{
   5888 		return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
   5889 	}
   5890 
   5891 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   5892 	{
   5893 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
   5894 	}
   5895 
   5896 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   5897 	{
   5898 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
   5899 	}
   5900 
   5901 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
   5902 	{
   5903 		return lhs = lhs + offset;
   5904 	}
   5905 
   5906 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
   5907 	{
   5908 		return lhs = lhs + offset;
   5909 	}
   5910 
   5911 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
   5912 	{
   5913 		return lhs = lhs + offset;
   5914 	}
   5915 
   5916 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
   5917 	{
   5918 		return lhs + -offset;
   5919 	}
   5920 
   5921 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   5922 	{
   5923 		return lhs + -offset;
   5924 	}
   5925 
   5926 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   5927 	{
   5928 		return lhs + -offset;
   5929 	}
   5930 
   5931 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
   5932 	{
   5933 		return lhs = lhs - offset;
   5934 	}
   5935 
   5936 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
   5937 	{
   5938 		return lhs = lhs - offset;
   5939 	}
   5940 
   5941 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
   5942 	{
   5943 		return lhs = lhs - offset;
   5944 	}
   5945 
   5946 	void Return()
   5947 	{
   5948 		Nucleus::createRetVoid();
   5949 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   5950 		Nucleus::createUnreachable();
   5951 	}
   5952 
   5953 	void Return(RValue<Int> ret)
   5954 	{
   5955 		Nucleus::createRet(ret.value);
   5956 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   5957 		Nucleus::createUnreachable();
   5958 	}
   5959 
   5960 	void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
   5961 	{
   5962 		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
   5963 		Nucleus::setInsertBlock(bodyBB);
   5964 	}
   5965 
   5966 	RValue<Long> Ticks()
   5967 	{
   5968 		llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
   5969 
   5970 		return RValue<Long>(V(::builder->CreateCall(rdtsc)));
   5971 	}
   5972 }
   5973 
   5974 namespace sw
   5975 {
   5976 	namespace x86
   5977 	{
   5978 		RValue<Int> cvtss2si(RValue<Float> val)
   5979 		{
   5980 			llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
   5981 
   5982 			Float4 vector;
   5983 			vector.x = val;
   5984 
   5985 			return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
   5986 		}
   5987 
   5988 		RValue<Int4> cvtps2dq(RValue<Float4> val)
   5989 		{
   5990 			llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
   5991 
   5992 			return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
   5993 		}
   5994 
   5995 		RValue<Float> rcpss(RValue<Float> val)
   5996 		{
   5997 			llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
   5998 
   5999 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   6000 
   6001 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
   6002 		}
   6003 
   6004 		RValue<Float> sqrtss(RValue<Float> val)
   6005 		{
   6006 			llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
   6007 
   6008 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   6009 
   6010 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
   6011 		}
   6012 
   6013 		RValue<Float> rsqrtss(RValue<Float> val)
   6014 		{
   6015 			llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
   6016 
   6017 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
   6018 
   6019 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
   6020 		}
   6021 
   6022 		RValue<Float4> rcpps(RValue<Float4> val)
   6023 		{
   6024 			llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
   6025 
   6026 			return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
   6027 		}
   6028 
   6029 		RValue<Float4> sqrtps(RValue<Float4> val)
   6030 		{
   6031 			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
   6032 
   6033 			return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
   6034 		}
   6035 
   6036 		RValue<Float4> rsqrtps(RValue<Float4> val)
   6037 		{
   6038 			llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
   6039 
   6040 			return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
   6041 		}
   6042 
   6043 		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
   6044 		{
   6045 			llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
   6046 
   6047 			return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
   6048 		}
   6049 
   6050 		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
   6051 		{
   6052 			llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
   6053 
   6054 			return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
   6055 		}
   6056 
   6057 		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
   6058 		{
   6059 			llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
   6060 
   6061 			Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
   6062 			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
   6063 
   6064 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
   6065 		}
   6066 
   6067 		RValue<Float> floorss(RValue<Float> val)
   6068 		{
   6069 			return roundss(val, 1);
   6070 		}
   6071 
   6072 		RValue<Float> ceilss(RValue<Float> val)
   6073 		{
   6074 			return roundss(val, 2);
   6075 		}
   6076 
   6077 		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
   6078 		{
   6079 			llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
   6080 
   6081 			return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
   6082 		}
   6083 
   6084 		RValue<Float4> floorps(RValue<Float4> val)
   6085 		{
   6086 			return roundps(val, 1);
   6087 		}
   6088 
   6089 		RValue<Float4> ceilps(RValue<Float4> val)
   6090 		{
   6091 			return roundps(val, 2);
   6092 		}
   6093 
   6094 		RValue<Int4> pabsd(RValue<Int4> x)
   6095 		{
   6096 			llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
   6097 
   6098 			return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
   6099 		}
   6100 
   6101 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
   6102 		{
   6103 			llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
   6104 
   6105 			return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value)));
   6106 		}
   6107 
   6108 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
   6109 		{
   6110 			llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
   6111 
   6112 			return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value)));
   6113 		}
   6114 
   6115 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
   6116 		{
   6117 			llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
   6118 
   6119 			return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value)));
   6120 		}
   6121 
   6122 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
   6123 		{
   6124 			llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
   6125 
   6126 			return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value)));
   6127 		}
   6128 
   6129 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
   6130 		{
   6131 			llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
   6132 
   6133 			return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value)));
   6134 		}
   6135 
   6136 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
   6137 		{
   6138 			llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
   6139 
   6140 			return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value)));
   6141 		}
   6142 
   6143 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
   6144 		{
   6145 			llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
   6146 
   6147 			return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value)));
   6148 		}
   6149 
   6150 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
   6151 		{
   6152 			llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
   6153 
   6154 			return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value)));
   6155 		}
   6156 
   6157 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
   6158 		{
   6159 			llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
   6160 
   6161 			return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value)));
   6162 		}
   6163 
   6164 		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
   6165 		{
   6166 			llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
   6167 
   6168 			return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value)));
   6169 		}
   6170 
   6171 		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
   6172 		{
   6173 			llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
   6174 
   6175 			return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value)));
   6176 		}
   6177 
   6178 		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
   6179 		{
   6180 			llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
   6181 
   6182 			return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value)));
   6183 		}
   6184 
   6185 		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
   6186 		{
   6187 			llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
   6188 
   6189 			return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value)));
   6190 		}
   6191 
   6192 		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
   6193 		{
   6194 			llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
   6195 
   6196 			return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value)));
   6197 		}
   6198 
   6199 		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
   6200 		{
   6201 			llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
   6202 
   6203 			return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value)));
   6204 		}
   6205 
   6206 		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
   6207 		{
   6208 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
   6209 
   6210 			return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
   6211 		}
   6212 
   6213 		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
   6214 		{
   6215 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
   6216 
   6217 			return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
   6218 		}
   6219 
   6220 		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
   6221 		{
   6222 			llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
   6223 
   6224 			return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
   6225 		}
   6226 
   6227 		RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
   6228 		{
   6229 			llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
   6230 
   6231 			return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value)));
   6232 		}
   6233 
   6234 		RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
   6235 		{
   6236 			if(CPUID::supportsSSE4_1())
   6237 			{
   6238 				llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
   6239 
   6240 				return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
   6241 			}
   6242 			else
   6243 			{
   6244 				RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
   6245 				RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
   6246 
   6247 				return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
   6248 			}
   6249 		}
   6250 
   6251 		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
   6252 		{
   6253 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
   6254 
   6255 			return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
   6256 		}
   6257 
   6258 		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
   6259 		{
   6260 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
   6261 
   6262 			return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
   6263 		}
   6264 
   6265 		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
   6266 		{
   6267 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
   6268 
   6269 			return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
   6270 		}
   6271 
   6272 		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
   6273 		{
   6274 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
   6275 
   6276 			return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
   6277 		}
   6278 
   6279 		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
   6280 		{
   6281 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
   6282 
   6283 			return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
   6284 		}
   6285 
   6286 		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
   6287 		{
   6288 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
   6289 
   6290 			return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
   6291 		}
   6292 
   6293 		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
   6294 		{
   6295 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
   6296 
   6297 			return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
   6298 		}
   6299 
   6300 		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
   6301 		{
   6302 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
   6303 
   6304 			return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
   6305 		}
   6306 
   6307 		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
   6308 		{
   6309 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
   6310 
   6311 			return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
   6312 		}
   6313 
   6314 		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
   6315 		{
   6316 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
   6317 
   6318 			return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
   6319 		}
   6320 
   6321 		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
   6322 		{
   6323 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
   6324 
   6325 			return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
   6326 		}
   6327 
   6328 		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
   6329 		{
   6330 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
   6331 
   6332 			return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
   6333 		}
   6334 
   6335 		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
   6336 		{
   6337 			llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
   6338 
   6339 			return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
   6340 		}
   6341 
   6342 		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
   6343 		{
   6344 			llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
   6345 
   6346 			return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
   6347 		}
   6348 
   6349 		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
   6350 		{
   6351 			llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
   6352 
   6353 			return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
   6354 		}
   6355 
   6356 		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
   6357 		{
   6358 			llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
   6359 
   6360 			return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
   6361 		}
   6362 
   6363 		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
   6364 		{
   6365 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
   6366 
   6367 			return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
   6368 		}
   6369 
   6370 		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
   6371 		{
   6372 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
   6373 
   6374 			return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
   6375 		}
   6376 
   6377 		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
   6378 		{
   6379 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
   6380 
   6381 			return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
   6382 		}
   6383 
   6384 		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
   6385 		{
   6386 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
   6387 
   6388 			return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
   6389 		}
   6390 
   6391 		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
   6392 		{
   6393 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
   6394 
   6395 			return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
   6396 		}
   6397 
   6398 		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
   6399 		{
   6400 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
   6401 
   6402 			return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
   6403 		}
   6404 
   6405 		RValue<Int> movmskps(RValue<Float4> x)
   6406 		{
   6407 			llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
   6408 
   6409 			return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
   6410 		}
   6411 
   6412 		RValue<Int> pmovmskb(RValue<Byte8> x)
   6413 		{
   6414 			llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
   6415 
   6416 			return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF;
   6417 		}
   6418 
   6419 		RValue<Int4> pmovzxbd(RValue<Byte16> x)
   6420 		{
   6421 			llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
   6422 
   6423 			return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value)));
   6424 		}
   6425 
   6426 		RValue<Int4> pmovsxbd(RValue<SByte16> x)
   6427 		{
   6428 			llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
   6429 
   6430 			return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value)));
   6431 		}
   6432 
   6433 		RValue<Int4> pmovzxwd(RValue<UShort8> x)
   6434 		{
   6435 			llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
   6436 
   6437 			return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value)));
   6438 		}
   6439 
   6440 		RValue<Int4> pmovsxwd(RValue<Short8> x)
   6441 		{
   6442 			llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
   6443 
   6444 			return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value)));
   6445 		}
   6446 	}
   6447 }
   6448