Home | History | Annotate | Download | only in Reactor
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Nucleus.hpp"
     16 
     17 #include "llvm/Support/IRBuilder.h"
     18 #include "llvm/Function.h"
     19 #include "llvm/GlobalVariable.h"
     20 #include "llvm/Module.h"
     21 #include "llvm/LLVMContext.h"
     22 #include "llvm/Constants.h"
     23 #include "llvm/Intrinsics.h"
     24 #include "llvm/PassManager.h"
     25 #include "llvm/Analysis/LoopPass.h"
     26 #include "llvm/Transforms/Scalar.h"
     27 #include "llvm/Target/TargetData.h"
     28 #include "llvm/Target/TargetOptions.h"
     29 #include "llvm/Support/TargetSelect.h"
     30 #include "../lib/ExecutionEngine/JIT/JIT.h"
     31 
     32 #include "Routine.hpp"
     33 #include "RoutineManager.hpp"
     34 #include "x86.hpp"
     35 #include "CPUID.hpp"
     36 #include "Thread.hpp"
     37 #include "Memory.hpp"
     38 
     39 #include <xmmintrin.h>
     40 #include <fstream>
     41 
     42 #if defined(__x86_64__) && defined(_WIN32)
     43 extern "C" void X86CompilationCallback()
     44 {
     45 	assert(false);   // UNIMPLEMENTED
     46 }
     47 #endif
     48 
     49 extern "C"
     50 {
     51 	bool (*CodeAnalystInitialize)() = 0;
     52 	void (*CodeAnalystCompleteJITLog)() = 0;
     53 	bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
     54 }
     55 
     56 namespace llvm
     57 {
     58 	extern bool JITEmitDebugInfo;
     59 }
     60 
     61 namespace sw
     62 {
     63 	Optimization optimization[10] = {InstructionCombining, Disabled};
     64 
     65 	using namespace llvm;
     66 
     67 	RoutineManager *Nucleus::routineManager = 0;
     68 	ExecutionEngine *Nucleus::executionEngine = 0;
     69 	Builder *Nucleus::builder = 0;
     70 	LLVMContext *Nucleus::context = 0;
     71 	Module *Nucleus::module = 0;
     72 	llvm::Function *Nucleus::function = 0;
     73 	BackoffLock Nucleus::codegenMutex;
     74 
     75 	class Builder : public IRBuilder<>
     76 	{
     77 	};
     78 
     79 	Nucleus::Nucleus()
     80 	{
     81 		codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
     82 
     83 		InitializeNativeTarget();
     84 		JITEmitDebugInfo = false;
     85 
     86 		if(!context)
     87 		{
     88 			context = new LLVMContext();
     89 		}
     90 
     91 		module = new Module("", *context);
     92 		routineManager = new RoutineManager();
     93 
     94 		#if defined(__x86_64__)
     95 			const char *architecture = "x86-64";
     96 		#else
     97 			const char *architecture = "x86";
     98 		#endif
     99 
    100 		SmallVector<std::string, 1> MAttrs;
    101 		MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
    102 		MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
    103 		MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
    104 		MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
    105 		MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
    106 		MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
    107 		MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
    108 
    109 		std::string error;
    110 		TargetMachine *targetMachine = EngineBuilder::selectTarget(module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
    111 		executionEngine = JIT::createJIT(module, 0, routineManager, CodeGenOpt::Aggressive, true, targetMachine);
    112 
    113 		if(!builder)
    114 		{
    115 			builder = static_cast<Builder*>(new IRBuilder<>(*context));
    116 
    117 			#if defined(_WIN32)
    118 				HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
    119 				if(CodeAnalyst)
    120 				{
    121 					CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
    122 					CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
    123 					CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
    124 
    125 					CodeAnalystInitialize();
    126 				}
    127 			#endif
    128 		}
    129 	}
    130 
    131 	Nucleus::~Nucleus()
    132 	{
    133 		delete executionEngine;
    134 		executionEngine = 0;
    135 
    136 		routineManager = 0;
    137 		function = 0;
    138 		module = 0;
    139 
    140 		codegenMutex.unlock();
    141 	}
    142 
    143 	Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
    144 	{
    145 		if(builder->GetInsertBlock()->empty() || !builder->GetInsertBlock()->back().isTerminator())
    146 		{
    147 			Type *type = function->getReturnType();
    148 
    149 			if(type->isVoidTy())
    150 			{
    151 				createRetVoid();
    152 			}
    153 			else
    154 			{
    155 				createRet(UndefValue::get(type));
    156 			}
    157 		}
    158 
    159 		if(false)
    160 		{
    161 			std::string error;
    162 			raw_fd_ostream file("llvm-dump-unopt.txt", error);
    163 			module->print(file, 0);
    164 		}
    165 
    166 		if(runOptimizations)
    167 		{
    168 			optimize();
    169 		}
    170 
    171 		if(false)
    172 		{
    173 			std::string error;
    174 			raw_fd_ostream file("llvm-dump-opt.txt", error);
    175 			module->print(file, 0);
    176 		}
    177 
    178 		void *entry = executionEngine->getPointerToFunction(function);
    179 		Routine *routine = routineManager->acquireRoutine(entry);
    180 
    181 		if(CodeAnalystLogJITCode)
    182 		{
    183 			CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
    184 		}
    185 
    186 		return routine;
    187 	}
    188 
    189 	void Nucleus::optimize()
    190 	{
    191 		static PassManager *passManager = 0;
    192 
    193 		if(!passManager)
    194 		{
    195 			passManager = new PassManager();
    196 
    197 			UnsafeFPMath = true;
    198 		//	NoInfsFPMath = true;
    199 		//	NoNaNsFPMath = true;
    200 
    201 			passManager->add(new TargetData(*executionEngine->getTargetData()));
    202 			passManager->add(createScalarReplAggregatesPass());
    203 
    204 			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
    205 			{
    206 				switch(optimization[pass])
    207 				{
    208 				case Disabled:                                                                 break;
    209 				case CFGSimplification:    passManager->add(createCFGSimplificationPass());    break;
    210 				case LICM:                 passManager->add(createLICMPass());                 break;
    211 				case AggressiveDCE:        passManager->add(createAggressiveDCEPass());        break;
    212 				case GVN:                  passManager->add(createGVNPass());                  break;
    213 				case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
    214 				case Reassociate:          passManager->add(createReassociatePass());          break;
    215 				case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
    216 				case SCCP:                 passManager->add(createSCCPPass());                 break;
    217 				case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
    218 				default:
    219 					assert(false);
    220 				}
    221 			}
    222 		}
    223 
    224 		passManager->run(*module);
    225 	}
    226 
    227 	void Nucleus::setFunction(llvm::Function *function)
    228 	{
    229 		Nucleus::function = function;
    230 
    231 		builder->SetInsertPoint(BasicBlock::Create(*context, "", function));
    232 	}
    233 
    234 	Module *Nucleus::getModule()
    235 	{
    236 		return module;
    237 	}
    238 
    239 	llvm::Function *Nucleus::getFunction()
    240 	{
    241 		return function;
    242 	}
    243 
    244 	llvm::LLVMContext *Nucleus::getContext()
    245 	{
    246 		return context;
    247 	}
    248 
    249 	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
    250 	{
    251 		// Need to allocate it in the entry block for mem2reg to work
    252 		llvm::Function *function = getFunction();
    253 		BasicBlock &entryBlock = function->getEntryBlock();
    254 
    255 		Instruction *declaration;
    256 
    257 		if(arraySize)
    258 		{
    259 			declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
    260 		}
    261 		else
    262 		{
    263 			declaration = new AllocaInst(type, (Value*)0);
    264 		}
    265 
    266 		entryBlock.getInstList().push_front(declaration);
    267 
    268 		return declaration;
    269 	}
    270 
    271 	BasicBlock *Nucleus::createBasicBlock()
    272 	{
    273 		return BasicBlock::Create(*context, "", Nucleus::getFunction());
    274 	}
    275 
    276 	BasicBlock *Nucleus::getInsertBlock()
    277 	{
    278 		return builder->GetInsertBlock();
    279 	}
    280 
    281 	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
    282 	{
    283 	//	assert(builder->GetInsertBlock()->back().isTerminator());
    284 		return builder->SetInsertPoint(basicBlock);
    285 	}
    286 
    287 	BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock)
    288 	{
    289 		return *pred_begin(basicBlock);
    290 	}
    291 
    292 	llvm::Function *Nucleus::createFunction(llvm::Type *ReturnType, std::vector<llvm::Type*> &Params)
    293 	{
    294 		llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, Params, false);
    295 		llvm::Function *function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", Nucleus::getModule());
    296 		function->setCallingConv(llvm::CallingConv::C);
    297 
    298 		return function;
    299 	}
    300 
    301 	llvm::Value *Nucleus::getArgument(llvm::Function *function, unsigned int index)
    302 	{
    303 		llvm::Function::arg_iterator args = function->arg_begin();
    304 
    305 		while(index)
    306 		{
    307 			args++;
    308 			index--;
    309 		}
    310 
    311 		return &*args;
    312 	}
    313 
    314 	Value *Nucleus::createRetVoid()
    315 	{
    316 		x86::emms();
    317 
    318 		return builder->CreateRetVoid();
    319 	}
    320 
    321 	Value *Nucleus::createRet(Value *V)
    322 	{
    323 		x86::emms();
    324 
    325 		return builder->CreateRet(V);
    326 	}
    327 
    328 	Value *Nucleus::createBr(BasicBlock *dest)
    329 	{
    330 		return builder->CreateBr(dest);
    331 	}
    332 
    333 	Value *Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
    334 	{
    335 		return builder->CreateCondBr(cond, ifTrue, ifFalse);
    336 	}
    337 
    338 	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
    339 	{
    340 		return builder->CreateAdd(lhs, rhs);
    341 	}
    342 
    343 	Value *Nucleus::createSub(Value *lhs, Value *rhs)
    344 	{
    345 		return builder->CreateSub(lhs, rhs);
    346 	}
    347 
    348 	Value *Nucleus::createMul(Value *lhs, Value *rhs)
    349 	{
    350 		return builder->CreateMul(lhs, rhs);
    351 	}
    352 
    353 	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
    354 	{
    355 		return builder->CreateUDiv(lhs, rhs);
    356 	}
    357 
    358 	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
    359 	{
    360 		return builder->CreateSDiv(lhs, rhs);
    361 	}
    362 
    363 	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
    364 	{
    365 		return builder->CreateFAdd(lhs, rhs);
    366 	}
    367 
    368 	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
    369 	{
    370 		return builder->CreateFSub(lhs, rhs);
    371 	}
    372 
    373 	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
    374 	{
    375 		return builder->CreateFMul(lhs, rhs);
    376 	}
    377 
    378 	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
    379 	{
    380 		return builder->CreateFDiv(lhs, rhs);
    381 	}
    382 
    383 	Value *Nucleus::createURem(Value *lhs, Value *rhs)
    384 	{
    385 		return builder->CreateURem(lhs, rhs);
    386 	}
    387 
    388 	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
    389 	{
    390 		return builder->CreateSRem(lhs, rhs);
    391 	}
    392 
    393 	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
    394 	{
    395 		return builder->CreateFRem(lhs, rhs);
    396 	}
    397 
    398 	Value *Nucleus::createShl(Value *lhs, Value *rhs)
    399 	{
    400 		return builder->CreateShl(lhs, rhs);
    401 	}
    402 
    403 	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
    404 	{
    405 		return builder->CreateLShr(lhs, rhs);
    406 	}
    407 
    408 	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
    409 	{
    410 		return builder->CreateAShr(lhs, rhs);
    411 	}
    412 
    413 	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
    414 	{
    415 		return builder->CreateAnd(lhs, rhs);
    416 	}
    417 
    418 	Value *Nucleus::createOr(Value *lhs, Value *rhs)
    419 	{
    420 		return builder->CreateOr(lhs, rhs);
    421 	}
    422 
    423 	Value *Nucleus::createXor(Value *lhs, Value *rhs)
    424 	{
    425 		return builder->CreateXor(lhs, rhs);
    426 	}
    427 
    428 	Value *Nucleus::createNeg(Value *V)
    429 	{
    430 		return builder->CreateNeg(V);
    431 	}
    432 
    433 	Value *Nucleus::createFNeg(Value *V)
    434 	{
    435 		return builder->CreateFNeg(V);
    436 	}
    437 
    438 	Value *Nucleus::createNot(Value *V)
    439 	{
    440 		return builder->CreateNot(V);
    441 	}
    442 
    443 	Value *Nucleus::createLoad(Value *ptr, bool isVolatile, unsigned int align)
    444 	{
    445 		return builder->Insert(new LoadInst(ptr, "", isVolatile, align));
    446 	}
    447 
    448 	Value *Nucleus::createStore(Value *value, Value *ptr, bool isVolatile, unsigned int align)
    449 	{
    450 		return builder->Insert(new StoreInst(value, ptr, isVolatile, align));
    451 	}
    452 
    453 	Value *Nucleus::createGEP(Value *ptr, Value *index)
    454 	{
    455 		return builder->CreateGEP(ptr, index);
    456 	}
    457 
    458 	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
    459 	{
    460 		return builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent);
    461 	}
    462 
    463 	Value *Nucleus::createTrunc(Value *V, Type *destType)
    464 	{
    465 		return builder->CreateTrunc(V, destType);
    466 	}
    467 
    468 	Value *Nucleus::createZExt(Value *V, Type *destType)
    469 	{
    470 		return builder->CreateZExt(V, destType);
    471 	}
    472 
    473 	Value *Nucleus::createSExt(Value *V, Type *destType)
    474 	{
    475 		return builder->CreateSExt(V, destType);
    476 	}
    477 
    478 	Value *Nucleus::createFPToUI(Value *V, Type *destType)
    479 	{
    480 		return builder->CreateFPToUI(V, destType);
    481 	}
    482 
    483 	Value *Nucleus::createFPToSI(Value *V, Type *destType)
    484 	{
    485 		return builder->CreateFPToSI(V, destType);
    486 	}
    487 
    488 	Value *Nucleus::createUIToFP(Value *V, Type *destType)
    489 	{
    490 		return builder->CreateUIToFP(V, destType);
    491 	}
    492 
    493 	Value *Nucleus::createSIToFP(Value *V, Type *destType)
    494 	{
    495 		return builder->CreateSIToFP(V, destType);
    496 	}
    497 
    498 	Value *Nucleus::createFPTrunc(Value *V, Type *destType)
    499 	{
    500 		return builder->CreateFPTrunc(V, destType);
    501 	}
    502 
    503 	Value *Nucleus::createFPExt(Value *V, Type *destType)
    504 	{
    505 		return builder->CreateFPExt(V, destType);
    506 	}
    507 
    508 	Value *Nucleus::createPtrToInt(Value *V, Type *destType)
    509 	{
    510 		return builder->CreatePtrToInt(V, destType);
    511 	}
    512 
    513 	Value *Nucleus::createIntToPtr(Value *V, Type *destType)
    514 	{
    515 		return builder->CreateIntToPtr(V, destType);
    516 	}
    517 
    518 	Value *Nucleus::createBitCast(Value *V, Type *destType)
    519 	{
    520 		return builder->CreateBitCast(V, destType);
    521 	}
    522 
    523 	Value *Nucleus::createIntCast(Value *V, Type *destType, bool isSigned)
    524 	{
    525 		return builder->CreateIntCast(V, destType, isSigned);
    526 	}
    527 
    528 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
    529 	{
    530 		return builder->CreateICmpEQ(lhs, rhs);
    531 	}
    532 
    533 	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
    534 	{
    535 		return builder->CreateICmpNE(lhs, rhs);
    536 	}
    537 
    538 	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
    539 	{
    540 		return builder->CreateICmpUGT(lhs, rhs);
    541 	}
    542 
    543 	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
    544 	{
    545 		return builder->CreateICmpUGE(lhs, rhs);
    546 	}
    547 
    548 	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
    549 	{
    550 		return builder->CreateICmpULT(lhs, rhs);
    551 	}
    552 
    553 	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
    554 	{
    555 		return builder->CreateICmpULE(lhs, rhs);
    556 	}
    557 
    558 	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
    559 	{
    560 		return builder->CreateICmpSGT(lhs, rhs);
    561 	}
    562 
    563 	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
    564 	{
    565 		return builder->CreateICmpSGE(lhs, rhs);
    566 	}
    567 
    568 	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
    569 	{
    570 		return builder->CreateICmpSLT(lhs, rhs);
    571 	}
    572 
    573 	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
    574 	{
    575 		return builder->CreateICmpSLE(lhs, rhs);
    576 	}
    577 
    578 	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
    579 	{
    580 		return builder->CreateFCmpOEQ(lhs, rhs);
    581 	}
    582 
    583 	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
    584 	{
    585 		return builder->CreateFCmpOGT(lhs, rhs);
    586 	}
    587 
    588 	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
    589 	{
    590 		return builder->CreateFCmpOGE(lhs, rhs);
    591 	}
    592 
    593 	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
    594 	{
    595 		return builder->CreateFCmpOLT(lhs, rhs);
    596 	}
    597 
    598 	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
    599 	{
    600 		return builder->CreateFCmpOLE(lhs, rhs);
    601 	}
    602 
    603 	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
    604 	{
    605 		return builder->CreateFCmpONE(lhs, rhs);
    606 	}
    607 
    608 	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
    609 	{
    610 		return builder->CreateFCmpORD(lhs, rhs);
    611 	}
    612 
    613 	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
    614 	{
    615 		return builder->CreateFCmpUNO(lhs, rhs);
    616 	}
    617 
    618 	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
    619 	{
    620 		return builder->CreateFCmpUEQ(lhs, rhs);
    621 	}
    622 
    623 	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
    624 	{
    625 		return builder->CreateFCmpUGT(lhs, rhs);
    626 	}
    627 
    628 	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
    629 	{
    630 		return builder->CreateFCmpUGE(lhs, rhs);
    631 	}
    632 
    633 	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
    634 	{
    635 		return builder->CreateFCmpULT(lhs, rhs);
    636 	}
    637 
    638 	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
    639 	{
    640 		return builder->CreateFCmpULE(lhs, rhs);
    641 	}
    642 
    643 	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
    644 	{
    645 		return builder->CreateFCmpULE(lhs, rhs);
    646 	}
    647 
    648 	Value *Nucleus::createCall(Value *callee)
    649 	{
    650 		return builder->CreateCall(callee);
    651 	}
    652 
    653 	Value *Nucleus::createCall(Value *callee, Value *arg)
    654 	{
    655 		return builder->CreateCall(callee, arg);
    656 	}
    657 
    658 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2)
    659 	{
    660 		return builder->CreateCall2(callee, arg1, arg2);
    661 	}
    662 
    663 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3)
    664 	{
    665 		return builder->CreateCall3(callee, arg1, arg2, arg3);
    666 	}
    667 
    668 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3, Value *arg4)
    669 	{
    670 		return builder->CreateCall4(callee, arg1, arg2, arg3, arg4);
    671 	}
    672 
    673 	Value *Nucleus::createExtractElement(Value *vector, int index)
    674 	{
    675 		return builder->CreateExtractElement(vector, createConstantInt(index));
    676 	}
    677 
    678 	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
    679 	{
    680 		return builder->CreateInsertElement(vector, element, createConstantInt(index));
    681 	}
    682 
    683 	Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask)
    684 	{
    685 		return builder->CreateShuffleVector(V1, V2, mask);
    686 	}
    687 
    688 	Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
    689 	{
    690 		return builder->CreateSelect(C, ifTrue, ifFalse);
    691 	}
    692 
    693 	Value *Nucleus::createSwitch(llvm::Value *V, llvm::BasicBlock *Dest, unsigned NumCases)
    694 	{
    695 		return builder->CreateSwitch(V, Dest, NumCases);
    696 	}
    697 
    698 	void Nucleus::addSwitchCase(llvm::Value *Switch, int Case, llvm::BasicBlock *Branch)
    699 	{
    700 		static_cast<SwitchInst*>(Switch)->addCase(Nucleus::createConstantInt(Case), Branch);
    701 	}
    702 
    703 	Value *Nucleus::createUnreachable()
    704 	{
    705 		return builder->CreateUnreachable();
    706 	}
    707 
    708 	Value *Nucleus::createSwizzle(Value *val, unsigned char select)
    709 	{
    710 		Constant *swizzle[4];
    711 		swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03);
    712 		swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03);
    713 		swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03);
    714 		swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03);
    715 
    716 		Value *shuffle = Nucleus::createShuffleVector(val, UndefValue::get(val->getType()), Nucleus::createConstantVector(swizzle, 4));
    717 
    718 		return shuffle;
    719 	}
    720 
    721 	Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select)
    722 	{
    723 		bool mask[4] = {false, false, false, false};
    724 
    725 		mask[(select >> 0) & 0x03] = true;
    726 		mask[(select >> 2) & 0x03] = true;
    727 		mask[(select >> 4) & 0x03] = true;
    728 		mask[(select >> 6) & 0x03] = true;
    729 
    730 		Constant *swizzle[4];
    731 		swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0);
    732 		swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1);
    733 		swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2);
    734 		swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3);
    735 
    736 		Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, Nucleus::createConstantVector(swizzle, 4));
    737 
    738 		return shuffle;
    739 	}
    740 
    741 	const llvm::GlobalValue *Nucleus::getGlobalValueAtAddress(void *Addr)
    742 	{
    743 		return executionEngine->getGlobalValueAtAddress(Addr);
    744 	}
    745 
    746 	void Nucleus::addGlobalMapping(const llvm::GlobalValue *GV, void *Addr)
    747 	{
    748 		executionEngine->addGlobalMapping(GV, Addr);
    749 	}
    750 
    751 	llvm::GlobalValue *Nucleus::createGlobalValue(llvm::Type *Ty, bool isConstant, unsigned int Align)
    752 	{
    753 		llvm::GlobalValue *global = new llvm::GlobalVariable(*Nucleus::getModule(), Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, "");
    754 		global->setAlignment(Align);
    755 
    756 		return global;
    757 	}
    758 
    759 	llvm::Type *Nucleus::getPointerType(llvm::Type *ElementType)
    760 	{
    761 		return llvm::PointerType::get(ElementType, 0);
    762 	}
    763 
    764 	llvm::Constant *Nucleus::createNullValue(llvm::Type *Ty)
    765 	{
    766 		return llvm::Constant::getNullValue(Ty);
    767 	}
    768 
    769 	llvm::ConstantInt *Nucleus::createConstantInt(int64_t i)
    770 	{
    771 		return llvm::ConstantInt::get(Type::getInt64Ty(*context), i, true);
    772 	}
    773 
    774 	llvm::ConstantInt *Nucleus::createConstantInt(int i)
    775 	{
    776 		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, true);
    777 	}
    778 
    779 	llvm::ConstantInt *Nucleus::createConstantInt(unsigned int i)
    780 	{
    781 		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, false);
    782 	}
    783 
    784 	llvm::ConstantInt *Nucleus::createConstantBool(bool b)
    785 	{
    786 		return llvm::ConstantInt::get(Type::getInt1Ty(*context), b);
    787 	}
    788 
    789 	llvm::ConstantInt *Nucleus::createConstantByte(signed char i)
    790 	{
    791 		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, true);
    792 	}
    793 
    794 	llvm::ConstantInt *Nucleus::createConstantByte(unsigned char i)
    795 	{
    796 		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, false);
    797 	}
    798 
    799 	llvm::ConstantInt *Nucleus::createConstantShort(short i)
    800 	{
    801 		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, true);
    802 	}
    803 
    804 	llvm::ConstantInt *Nucleus::createConstantShort(unsigned short i)
    805 	{
    806 		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, false);
    807 	}
    808 
    809 	llvm::Constant *Nucleus::createConstantFloat(float x)
    810 	{
    811 		return ConstantFP::get(Float::getType(), x);
    812 	}
    813 
    814 	llvm::Value *Nucleus::createNullPointer(llvm::Type *Ty)
    815 	{
    816 		return llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0));
    817 	}
    818 
    819 	llvm::Value *Nucleus::createConstantVector(llvm::Constant *const *Vals, unsigned NumVals)
    820 	{
    821 		return llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(Vals, NumVals));
    822 	}
    823 
    824 	Type *Void::getType()
    825 	{
    826 		return Type::getVoidTy(*Nucleus::getContext());
    827 	}
    828 
    829 	LValue::LValue(llvm::Type *type, int arraySize)
    830 	{
    831 		address = Nucleus::allocateStackVariable(type, arraySize);
    832 	}
    833 
    834 	llvm::Value *LValue::loadValue(unsigned int alignment) const
    835 	{
    836 		return Nucleus::createLoad(address, false, alignment);
    837 	}
    838 
    839 	llvm::Value *LValue::storeValue(llvm::Value *value, unsigned int alignment) const
    840 	{
    841 		return Nucleus::createStore(value, address, false, alignment);
    842 	}
    843 
    844 	llvm::Value *LValue::getAddress(llvm::Value *index) const
    845 	{
    846 		return Nucleus::createGEP(address, index);
    847 	}
    848 
    849 	Type *MMX::getType()
    850 	{
    851 		return Type::getX86_MMXTy(*Nucleus::getContext());
    852 	}
    853 
    854 	Bool::Bool(Argument<Bool> argument)
    855 	{
    856 		storeValue(argument.value);
    857 	}
    858 
    859 	Bool::Bool()
    860 	{
    861 	}
    862 
    863 	Bool::Bool(bool x)
    864 	{
    865 		storeValue(Nucleus::createConstantBool(x));
    866 	}
    867 
    868 	Bool::Bool(RValue<Bool> rhs)
    869 	{
    870 		storeValue(rhs.value);
    871 	}
    872 
    873 	Bool::Bool(const Bool &rhs)
    874 	{
    875 		Value *value = rhs.loadValue();
    876 		storeValue(value);
    877 	}
    878 
    879 	Bool::Bool(const Reference<Bool> &rhs)
    880 	{
    881 		Value *value = rhs.loadValue();
    882 		storeValue(value);
    883 	}
    884 
    885 	RValue<Bool> Bool::operator=(RValue<Bool> rhs) const
    886 	{
    887 		storeValue(rhs.value);
    888 
    889 		return rhs;
    890 	}
    891 
    892 	RValue<Bool> Bool::operator=(const Bool &rhs) const
    893 	{
    894 		Value *value = rhs.loadValue();
    895 		storeValue(value);
    896 
    897 		return RValue<Bool>(value);
    898 	}
    899 
    900 	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const
    901 	{
    902 		Value *value = rhs.loadValue();
    903 		storeValue(value);
    904 
    905 		return RValue<Bool>(value);
    906 	}
    907 
    908 	RValue<Bool> operator!(RValue<Bool> val)
    909 	{
    910 		return RValue<Bool>(Nucleus::createNot(val.value));
    911 	}
    912 
    913 	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
    914 	{
    915 		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
    916 	}
    917 
    918 	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
    919 	{
    920 		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
    921 	}
    922 
    923 	Type *Bool::getType()
    924 	{
    925 		return Type::getInt1Ty(*Nucleus::getContext());
    926 	}
    927 
    928 	Byte::Byte(Argument<Byte> argument)
    929 	{
    930 		storeValue(argument.value);
    931 	}
    932 
    933 	Byte::Byte(RValue<Int> cast)
    934 	{
    935 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
    936 
    937 		storeValue(integer);
    938 	}
    939 
    940 	Byte::Byte(RValue<UInt> cast)
    941 	{
    942 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
    943 
    944 		storeValue(integer);
    945 	}
    946 
    947 	Byte::Byte(RValue<UShort> cast)
    948 	{
    949 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
    950 
    951 		storeValue(integer);
    952 	}
    953 
    954 	Byte::Byte()
    955 	{
    956 	}
    957 
    958 	Byte::Byte(int x)
    959 	{
    960 		storeValue(Nucleus::createConstantByte((unsigned char)x));
    961 	}
    962 
    963 	Byte::Byte(unsigned char x)
    964 	{
    965 		storeValue(Nucleus::createConstantByte(x));
    966 	}
    967 
    968 	Byte::Byte(RValue<Byte> rhs)
    969 	{
    970 		storeValue(rhs.value);
    971 	}
    972 
    973 	Byte::Byte(const Byte &rhs)
    974 	{
    975 		Value *value = rhs.loadValue();
    976 		storeValue(value);
    977 	}
    978 
    979 	Byte::Byte(const Reference<Byte> &rhs)
    980 	{
    981 		Value *value = rhs.loadValue();
    982 		storeValue(value);
    983 	}
    984 
    985 	RValue<Byte> Byte::operator=(RValue<Byte> rhs) const
    986 	{
    987 		storeValue(rhs.value);
    988 
    989 		return rhs;
    990 	}
    991 
    992 	RValue<Byte> Byte::operator=(const Byte &rhs) const
    993 	{
    994 		Value *value = rhs.loadValue();
    995 		storeValue(value);
    996 
    997 		return RValue<Byte>(value);
    998 	}
    999 
   1000 	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const
   1001 	{
   1002 		Value *value = rhs.loadValue();
   1003 		storeValue(value);
   1004 
   1005 		return RValue<Byte>(value);
   1006 	}
   1007 
   1008 	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
   1009 	{
   1010 		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
   1011 	}
   1012 
   1013 	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
   1014 	{
   1015 		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
   1016 	}
   1017 
   1018 	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
   1019 	{
   1020 		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
   1021 	}
   1022 
   1023 	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
   1024 	{
   1025 		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
   1026 	}
   1027 
   1028 	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
   1029 	{
   1030 		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
   1031 	}
   1032 
   1033 	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
   1034 	{
   1035 		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
   1036 	}
   1037 
   1038 	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
   1039 	{
   1040 		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
   1041 	}
   1042 
   1043 	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
   1044 	{
   1045 		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
   1046 	}
   1047 
   1048 	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
   1049 	{
   1050 		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
   1051 	}
   1052 
   1053 	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
   1054 	{
   1055 		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
   1056 	}
   1057 
   1058 	RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs)
   1059 	{
   1060 		return lhs = lhs + rhs;
   1061 	}
   1062 
   1063 	RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs)
   1064 	{
   1065 		return lhs = lhs - rhs;
   1066 	}
   1067 
   1068 	RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs)
   1069 	{
   1070 		return lhs = lhs * rhs;
   1071 	}
   1072 
   1073 	RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs)
   1074 	{
   1075 		return lhs = lhs / rhs;
   1076 	}
   1077 
   1078 	RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs)
   1079 	{
   1080 		return lhs = lhs % rhs;
   1081 	}
   1082 
   1083 	RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs)
   1084 	{
   1085 		return lhs = lhs & rhs;
   1086 	}
   1087 
   1088 	RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs)
   1089 	{
   1090 		return lhs = lhs | rhs;
   1091 	}
   1092 
   1093 	RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs)
   1094 	{
   1095 		return lhs = lhs ^ rhs;
   1096 	}
   1097 
   1098 	RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs)
   1099 	{
   1100 		return lhs = lhs << rhs;
   1101 	}
   1102 
   1103 	RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs)
   1104 	{
   1105 		return lhs = lhs >> rhs;
   1106 	}
   1107 
   1108 	RValue<Byte> operator+(RValue<Byte> val)
   1109 	{
   1110 		return val;
   1111 	}
   1112 
   1113 	RValue<Byte> operator-(RValue<Byte> val)
   1114 	{
   1115 		return RValue<Byte>(Nucleus::createNeg(val.value));
   1116 	}
   1117 
   1118 	RValue<Byte> operator~(RValue<Byte> val)
   1119 	{
   1120 		return RValue<Byte>(Nucleus::createNot(val.value));
   1121 	}
   1122 
   1123 	RValue<Byte> operator++(const Byte &val, int)   // Post-increment
   1124 	{
   1125 		RValue<Byte> res = val;
   1126 
   1127 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
   1128 		val.storeValue(inc);
   1129 
   1130 		return res;
   1131 	}
   1132 
   1133 	const Byte &operator++(const Byte &val)   // Pre-increment
   1134 	{
   1135 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
   1136 		val.storeValue(inc);
   1137 
   1138 		return val;
   1139 	}
   1140 
   1141 	RValue<Byte> operator--(const Byte &val, int)   // Post-decrement
   1142 	{
   1143 		RValue<Byte> res = val;
   1144 
   1145 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
   1146 		val.storeValue(inc);
   1147 
   1148 		return res;
   1149 	}
   1150 
   1151 	const Byte &operator--(const Byte &val)   // Pre-decrement
   1152 	{
   1153 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
   1154 		val.storeValue(inc);
   1155 
   1156 		return val;
   1157 	}
   1158 
   1159 	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
   1160 	{
   1161 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   1162 	}
   1163 
   1164 	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
   1165 	{
   1166 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   1167 	}
   1168 
   1169 	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
   1170 	{
   1171 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   1172 	}
   1173 
   1174 	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
   1175 	{
   1176 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   1177 	}
   1178 
   1179 	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
   1180 	{
   1181 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1182 	}
   1183 
   1184 	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
   1185 	{
   1186 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1187 	}
   1188 
   1189 	Type *Byte::getType()
   1190 	{
   1191 		return Type::getInt8Ty(*Nucleus::getContext());
   1192 	}
   1193 
   1194 	SByte::SByte(Argument<SByte> argument)
   1195 	{
   1196 		storeValue(argument.value);
   1197 	}
   1198 
   1199 	SByte::SByte(RValue<Int> cast)
   1200 	{
   1201 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1202 
   1203 		storeValue(integer);
   1204 	}
   1205 
   1206 	SByte::SByte(RValue<Short> cast)
   1207 	{
   1208 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
   1209 
   1210 		storeValue(integer);
   1211 	}
   1212 
   1213 	SByte::SByte()
   1214 	{
   1215 	}
   1216 
   1217 	SByte::SByte(signed char x)
   1218 	{
   1219 		storeValue(Nucleus::createConstantByte(x));
   1220 	}
   1221 
   1222 	SByte::SByte(RValue<SByte> rhs)
   1223 	{
   1224 		storeValue(rhs.value);
   1225 	}
   1226 
   1227 	SByte::SByte(const SByte &rhs)
   1228 	{
   1229 		Value *value = rhs.loadValue();
   1230 		storeValue(value);
   1231 	}
   1232 
   1233 	SByte::SByte(const Reference<SByte> &rhs)
   1234 	{
   1235 		Value *value = rhs.loadValue();
   1236 		storeValue(value);
   1237 	}
   1238 
   1239 	RValue<SByte> SByte::operator=(RValue<SByte> rhs) const
   1240 	{
   1241 		storeValue(rhs.value);
   1242 
   1243 		return rhs;
   1244 	}
   1245 
   1246 	RValue<SByte> SByte::operator=(const SByte &rhs) const
   1247 	{
   1248 		Value *value = rhs.loadValue();
   1249 		storeValue(value);
   1250 
   1251 		return RValue<SByte>(value);
   1252 	}
   1253 
   1254 	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const
   1255 	{
   1256 		Value *value = rhs.loadValue();
   1257 		storeValue(value);
   1258 
   1259 		return RValue<SByte>(value);
   1260 	}
   1261 
   1262 	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
   1263 	{
   1264 		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
   1265 	}
   1266 
   1267 	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
   1268 	{
   1269 		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
   1270 	}
   1271 
   1272 	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
   1273 	{
   1274 		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
   1275 	}
   1276 
   1277 	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
   1278 	{
   1279 		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
   1280 	}
   1281 
   1282 	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
   1283 	{
   1284 		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
   1285 	}
   1286 
   1287 	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
   1288 	{
   1289 		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
   1290 	}
   1291 
   1292 	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
   1293 	{
   1294 		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
   1295 	}
   1296 
   1297 	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
   1298 	{
   1299 		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
   1300 	}
   1301 
   1302 	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
   1303 	{
   1304 		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
   1305 	}
   1306 
   1307 	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
   1308 	{
   1309 		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
   1310 	}
   1311 
   1312 	RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs)
   1313 	{
   1314 		return lhs = lhs + rhs;
   1315 	}
   1316 
   1317 	RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs)
   1318 	{
   1319 		return lhs = lhs - rhs;
   1320 	}
   1321 
   1322 	RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs)
   1323 	{
   1324 		return lhs = lhs * rhs;
   1325 	}
   1326 
   1327 	RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs)
   1328 	{
   1329 		return lhs = lhs / rhs;
   1330 	}
   1331 
   1332 	RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs)
   1333 	{
   1334 		return lhs = lhs % rhs;
   1335 	}
   1336 
   1337 	RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs)
   1338 	{
   1339 		return lhs = lhs & rhs;
   1340 	}
   1341 
   1342 	RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs)
   1343 	{
   1344 		return lhs = lhs | rhs;
   1345 	}
   1346 
   1347 	RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs)
   1348 	{
   1349 		return lhs = lhs ^ rhs;
   1350 	}
   1351 
   1352 	RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs)
   1353 	{
   1354 		return lhs = lhs << rhs;
   1355 	}
   1356 
   1357 	RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs)
   1358 	{
   1359 		return lhs = lhs >> rhs;
   1360 	}
   1361 
   1362 	RValue<SByte> operator+(RValue<SByte> val)
   1363 	{
   1364 		return val;
   1365 	}
   1366 
   1367 	RValue<SByte> operator-(RValue<SByte> val)
   1368 	{
   1369 		return RValue<SByte>(Nucleus::createNeg(val.value));
   1370 	}
   1371 
   1372 	RValue<SByte> operator~(RValue<SByte> val)
   1373 	{
   1374 		return RValue<SByte>(Nucleus::createNot(val.value));
   1375 	}
   1376 
   1377 	RValue<SByte> operator++(const SByte &val, int)   // Post-increment
   1378 	{
   1379 		RValue<SByte> res = val;
   1380 
   1381 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
   1382 		val.storeValue(inc);
   1383 
   1384 		return res;
   1385 	}
   1386 
   1387 	const SByte &operator++(const SByte &val)   // Pre-increment
   1388 	{
   1389 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
   1390 		val.storeValue(inc);
   1391 
   1392 		return val;
   1393 	}
   1394 
   1395 	RValue<SByte> operator--(const SByte &val, int)   // Post-decrement
   1396 	{
   1397 		RValue<SByte> res = val;
   1398 
   1399 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
   1400 		val.storeValue(inc);
   1401 
   1402 		return res;
   1403 	}
   1404 
   1405 	const SByte &operator--(const SByte &val)   // Pre-decrement
   1406 	{
   1407 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
   1408 		val.storeValue(inc);
   1409 
   1410 		return val;
   1411 	}
   1412 
   1413 	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
   1414 	{
   1415 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   1416 	}
   1417 
   1418 	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
   1419 	{
   1420 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   1421 	}
   1422 
   1423 	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
   1424 	{
   1425 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   1426 	}
   1427 
   1428 	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
   1429 	{
   1430 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   1431 	}
   1432 
   1433 	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
   1434 	{
   1435 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1436 	}
   1437 
   1438 	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
   1439 	{
   1440 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1441 	}
   1442 
   1443 	Type *SByte::getType()
   1444 	{
   1445 		return Type::getInt8Ty(*Nucleus::getContext());
   1446 	}
   1447 
   1448 	Short::Short(Argument<Short> argument)
   1449 	{
   1450 		storeValue(argument.value);
   1451 	}
   1452 
   1453 	Short::Short(RValue<Int> cast)
   1454 	{
   1455 		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
   1456 
   1457 		storeValue(integer);
   1458 	}
   1459 
   1460 	Short::Short()
   1461 	{
   1462 	}
   1463 
   1464 	Short::Short(short x)
   1465 	{
   1466 		storeValue(Nucleus::createConstantShort(x));
   1467 	}
   1468 
   1469 	Short::Short(RValue<Short> rhs)
   1470 	{
   1471 		storeValue(rhs.value);
   1472 	}
   1473 
   1474 	Short::Short(const Short &rhs)
   1475 	{
   1476 		Value *value = rhs.loadValue();
   1477 		storeValue(value);
   1478 	}
   1479 
   1480 	Short::Short(const Reference<Short> &rhs)
   1481 	{
   1482 		Value *value = rhs.loadValue();
   1483 		storeValue(value);
   1484 	}
   1485 
   1486 	RValue<Short> Short::operator=(RValue<Short> rhs) const
   1487 	{
   1488 		storeValue(rhs.value);
   1489 
   1490 		return rhs;
   1491 	}
   1492 
   1493 	RValue<Short> Short::operator=(const Short &rhs) const
   1494 	{
   1495 		Value *value = rhs.loadValue();
   1496 		storeValue(value);
   1497 
   1498 		return RValue<Short>(value);
   1499 	}
   1500 
   1501 	RValue<Short> Short::operator=(const Reference<Short> &rhs) const
   1502 	{
   1503 		Value *value = rhs.loadValue();
   1504 		storeValue(value);
   1505 
   1506 		return RValue<Short>(value);
   1507 	}
   1508 
   1509 	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
   1510 	{
   1511 		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
   1512 	}
   1513 
   1514 	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
   1515 	{
   1516 		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
   1517 	}
   1518 
   1519 	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
   1520 	{
   1521 		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
   1522 	}
   1523 
   1524 	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
   1525 	{
   1526 		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
   1527 	}
   1528 
   1529 	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
   1530 	{
   1531 		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
   1532 	}
   1533 
   1534 	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
   1535 	{
   1536 		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
   1537 	}
   1538 
   1539 	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
   1540 	{
   1541 		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
   1542 	}
   1543 
   1544 	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
   1545 	{
   1546 		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
   1547 	}
   1548 
   1549 	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
   1550 	{
   1551 		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
   1552 	}
   1553 
   1554 	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
   1555 	{
   1556 		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
   1557 	}
   1558 
   1559 	RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs)
   1560 	{
   1561 		return lhs = lhs + rhs;
   1562 	}
   1563 
   1564 	RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs)
   1565 	{
   1566 		return lhs = lhs - rhs;
   1567 	}
   1568 
   1569 	RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs)
   1570 	{
   1571 		return lhs = lhs * rhs;
   1572 	}
   1573 
   1574 	RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs)
   1575 	{
   1576 		return lhs = lhs / rhs;
   1577 	}
   1578 
   1579 	RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs)
   1580 	{
   1581 		return lhs = lhs % rhs;
   1582 	}
   1583 
   1584 	RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs)
   1585 	{
   1586 		return lhs = lhs & rhs;
   1587 	}
   1588 
   1589 	RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs)
   1590 	{
   1591 		return lhs = lhs | rhs;
   1592 	}
   1593 
   1594 	RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs)
   1595 	{
   1596 		return lhs = lhs ^ rhs;
   1597 	}
   1598 
   1599 	RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs)
   1600 	{
   1601 		return lhs = lhs << rhs;
   1602 	}
   1603 
   1604 	RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs)
   1605 	{
   1606 		return lhs = lhs >> rhs;
   1607 	}
   1608 
   1609 	RValue<Short> operator+(RValue<Short> val)
   1610 	{
   1611 		return val;
   1612 	}
   1613 
   1614 	RValue<Short> operator-(RValue<Short> val)
   1615 	{
   1616 		return RValue<Short>(Nucleus::createNeg(val.value));
   1617 	}
   1618 
   1619 	RValue<Short> operator~(RValue<Short> val)
   1620 	{
   1621 		return RValue<Short>(Nucleus::createNot(val.value));
   1622 	}
   1623 
   1624 	RValue<Short> operator++(const Short &val, int)   // Post-increment
   1625 	{
   1626 		RValue<Short> res = val;
   1627 
   1628 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
   1629 		val.storeValue(inc);
   1630 
   1631 		return res;
   1632 	}
   1633 
   1634 	const Short &operator++(const Short &val)   // Pre-increment
   1635 	{
   1636 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
   1637 		val.storeValue(inc);
   1638 
   1639 		return val;
   1640 	}
   1641 
   1642 	RValue<Short> operator--(const Short &val, int)   // Post-decrement
   1643 	{
   1644 		RValue<Short> res = val;
   1645 
   1646 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
   1647 		val.storeValue(inc);
   1648 
   1649 		return res;
   1650 	}
   1651 
   1652 	const Short &operator--(const Short &val)   // Pre-decrement
   1653 	{
   1654 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
   1655 		val.storeValue(inc);
   1656 
   1657 		return val;
   1658 	}
   1659 
   1660 	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
   1661 	{
   1662 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   1663 	}
   1664 
   1665 	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
   1666 	{
   1667 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   1668 	}
   1669 
   1670 	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
   1671 	{
   1672 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   1673 	}
   1674 
   1675 	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
   1676 	{
   1677 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   1678 	}
   1679 
   1680 	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
   1681 	{
   1682 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1683 	}
   1684 
   1685 	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
   1686 	{
   1687 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1688 	}
   1689 
   1690 	Type *Short::getType()
   1691 	{
   1692 		return Type::getInt16Ty(*Nucleus::getContext());
   1693 	}
   1694 
   1695 	UShort::UShort(Argument<UShort> argument)
   1696 	{
   1697 		storeValue(argument.value);
   1698 	}
   1699 
   1700 	UShort::UShort(RValue<UInt> cast)
   1701 	{
   1702 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   1703 
   1704 		storeValue(integer);
   1705 	}
   1706 
   1707 	UShort::UShort(RValue<Int> cast)
   1708 	{
   1709 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
   1710 
   1711 		storeValue(integer);
   1712 	}
   1713 
   1714 	UShort::UShort()
   1715 	{
   1716 	}
   1717 
   1718 	UShort::UShort(unsigned short x)
   1719 	{
   1720 		storeValue(Nucleus::createConstantShort(x));
   1721 	}
   1722 
   1723 	UShort::UShort(RValue<UShort> rhs)
   1724 	{
   1725 		storeValue(rhs.value);
   1726 	}
   1727 
   1728 	UShort::UShort(const UShort &rhs)
   1729 	{
   1730 		Value *value = rhs.loadValue();
   1731 		storeValue(value);
   1732 	}
   1733 
   1734 	UShort::UShort(const Reference<UShort> &rhs)
   1735 	{
   1736 		Value *value = rhs.loadValue();
   1737 		storeValue(value);
   1738 	}
   1739 
   1740 	RValue<UShort> UShort::operator=(RValue<UShort> rhs) const
   1741 	{
   1742 		storeValue(rhs.value);
   1743 
   1744 		return rhs;
   1745 	}
   1746 
   1747 	RValue<UShort> UShort::operator=(const UShort &rhs) const
   1748 	{
   1749 		Value *value = rhs.loadValue();
   1750 		storeValue(value);
   1751 
   1752 		return RValue<UShort>(value);
   1753 	}
   1754 
   1755 	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const
   1756 	{
   1757 		Value *value = rhs.loadValue();
   1758 		storeValue(value);
   1759 
   1760 		return RValue<UShort>(value);
   1761 	}
   1762 
   1763 	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
   1764 	{
   1765 		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
   1766 	}
   1767 
   1768 	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
   1769 	{
   1770 		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
   1771 	}
   1772 
   1773 	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
   1774 	{
   1775 		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
   1776 	}
   1777 
   1778 	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
   1779 	{
   1780 		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
   1781 	}
   1782 
   1783 	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
   1784 	{
   1785 		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
   1786 	}
   1787 
   1788 	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
   1789 	{
   1790 		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
   1791 	}
   1792 
   1793 	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
   1794 	{
   1795 		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
   1796 	}
   1797 
   1798 	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
   1799 	{
   1800 		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
   1801 	}
   1802 
   1803 	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
   1804 	{
   1805 		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
   1806 	}
   1807 
   1808 	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
   1809 	{
   1810 		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
   1811 	}
   1812 
   1813 	RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs)
   1814 	{
   1815 		return lhs = lhs + rhs;
   1816 	}
   1817 
   1818 	RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs)
   1819 	{
   1820 		return lhs = lhs - rhs;
   1821 	}
   1822 
   1823 	RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs)
   1824 	{
   1825 		return lhs = lhs * rhs;
   1826 	}
   1827 
   1828 	RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs)
   1829 	{
   1830 		return lhs = lhs / rhs;
   1831 	}
   1832 
   1833 	RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs)
   1834 	{
   1835 		return lhs = lhs % rhs;
   1836 	}
   1837 
   1838 	RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs)
   1839 	{
   1840 		return lhs = lhs & rhs;
   1841 	}
   1842 
   1843 	RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs)
   1844 	{
   1845 		return lhs = lhs | rhs;
   1846 	}
   1847 
   1848 	RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs)
   1849 	{
   1850 		return lhs = lhs ^ rhs;
   1851 	}
   1852 
   1853 	RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs)
   1854 	{
   1855 		return lhs = lhs << rhs;
   1856 	}
   1857 
   1858 	RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs)
   1859 	{
   1860 		return lhs = lhs >> rhs;
   1861 	}
   1862 
   1863 	RValue<UShort> operator+(RValue<UShort> val)
   1864 	{
   1865 		return val;
   1866 	}
   1867 
   1868 	RValue<UShort> operator-(RValue<UShort> val)
   1869 	{
   1870 		return RValue<UShort>(Nucleus::createNeg(val.value));
   1871 	}
   1872 
   1873 	RValue<UShort> operator~(RValue<UShort> val)
   1874 	{
   1875 		return RValue<UShort>(Nucleus::createNot(val.value));
   1876 	}
   1877 
   1878 	RValue<UShort> operator++(const UShort &val, int)   // Post-increment
   1879 	{
   1880 		RValue<UShort> res = val;
   1881 
   1882 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
   1883 		val.storeValue(inc);
   1884 
   1885 		return res;
   1886 	}
   1887 
   1888 	const UShort &operator++(const UShort &val)   // Pre-increment
   1889 	{
   1890 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
   1891 		val.storeValue(inc);
   1892 
   1893 		return val;
   1894 	}
   1895 
   1896 	RValue<UShort> operator--(const UShort &val, int)   // Post-decrement
   1897 	{
   1898 		RValue<UShort> res = val;
   1899 
   1900 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
   1901 		val.storeValue(inc);
   1902 
   1903 		return res;
   1904 	}
   1905 
   1906 	const UShort &operator--(const UShort &val)   // Pre-decrement
   1907 	{
   1908 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
   1909 		val.storeValue(inc);
   1910 
   1911 		return val;
   1912 	}
   1913 
   1914 	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
   1915 	{
   1916 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   1917 	}
   1918 
   1919 	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
   1920 	{
   1921 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   1922 	}
   1923 
   1924 	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
   1925 	{
   1926 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   1927 	}
   1928 
   1929 	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
   1930 	{
   1931 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   1932 	}
   1933 
   1934 	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
   1935 	{
   1936 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   1937 	}
   1938 
   1939 	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
   1940 	{
   1941 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   1942 	}
   1943 
   1944 	Type *UShort::getType()
   1945 	{
   1946 		return Type::getInt16Ty(*Nucleus::getContext());
   1947 	}
   1948 
   1949 	Type *Byte4::getType()
   1950 	{
   1951 		#if 0
   1952 			return VectorType::get(Byte::getType(), 4);
   1953 		#else
   1954 			return UInt::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
   1955 		#endif
   1956 	}
   1957 
   1958 	Type *SByte4::getType()
   1959 	{
   1960 		#if 0
   1961 			return VectorType::get(SByte::getType(), 4);
   1962 		#else
   1963 			return Int::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
   1964 		#endif
   1965 	}
   1966 
   1967 	Byte8::Byte8()
   1968 	{
   1969 	//	xyzw.parent = this;
   1970 	}
   1971 
   1972 	Byte8::Byte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
   1973 	{
   1974 	//	xyzw.parent = this;
   1975 
   1976 		Constant *constantVector[8];
   1977 		constantVector[0] = Nucleus::createConstantByte(x0);
   1978 		constantVector[1] = Nucleus::createConstantByte(x1);
   1979 		constantVector[2] = Nucleus::createConstantByte(x2);
   1980 		constantVector[3] = Nucleus::createConstantByte(x3);
   1981 		constantVector[4] = Nucleus::createConstantByte(x4);
   1982 		constantVector[5] = Nucleus::createConstantByte(x5);
   1983 		constantVector[6] = Nucleus::createConstantByte(x6);
   1984 		constantVector[7] = Nucleus::createConstantByte(x7);
   1985 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
   1986 
   1987 		storeValue(Nucleus::createBitCast(vector, getType()));
   1988 	}
   1989 
   1990 	Byte8::Byte8(int64_t x)
   1991 	{
   1992 	//	xyzw.parent = this;
   1993 
   1994 		Constant *constantVector[8];
   1995 		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
   1996 		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
   1997 		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
   1998 		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
   1999 		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
   2000 		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
   2001 		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
   2002 		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
   2003 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
   2004 
   2005 		storeValue(Nucleus::createBitCast(vector, getType()));
   2006 	}
   2007 
   2008 	Byte8::Byte8(RValue<Byte8> rhs)
   2009 	{
   2010 	//	xyzw.parent = this;
   2011 
   2012 		storeValue(rhs.value);
   2013 	}
   2014 
   2015 	Byte8::Byte8(const Byte8 &rhs)
   2016 	{
   2017 	//	xyzw.parent = this;
   2018 
   2019 		Value *value = rhs.loadValue();
   2020 		storeValue(value);
   2021 	}
   2022 
   2023 	Byte8::Byte8(const Reference<Byte8> &rhs)
   2024 	{
   2025 	//	xyzw.parent = this;
   2026 
   2027 		Value *value = rhs.loadValue();
   2028 		storeValue(value);
   2029 	}
   2030 
   2031 	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const
   2032 	{
   2033 		storeValue(rhs.value);
   2034 
   2035 		return rhs;
   2036 	}
   2037 
   2038 	RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
   2039 	{
   2040 		Value *value = rhs.loadValue();
   2041 		storeValue(value);
   2042 
   2043 		return RValue<Byte8>(value);
   2044 	}
   2045 
   2046 	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const
   2047 	{
   2048 		Value *value = rhs.loadValue();
   2049 		storeValue(value);
   2050 
   2051 		return RValue<Byte8>(value);
   2052 	}
   2053 
   2054 	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2055 	{
   2056 		if(CPUID::supportsMMX2())
   2057 		{
   2058 			return x86::paddb(lhs, rhs);
   2059 		}
   2060 		else
   2061 		{
   2062 			return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2063 		}
   2064 	}
   2065 
   2066 	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2067 	{
   2068 		if(CPUID::supportsMMX2())
   2069 		{
   2070 			return x86::psubb(lhs, rhs);
   2071 		}
   2072 		else
   2073 		{
   2074 			return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
   2075 		}
   2076 	}
   2077 
   2078 //	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2079 //	{
   2080 //		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
   2081 //	}
   2082 
   2083 //	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2084 //	{
   2085 //		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
   2086 //	}
   2087 
   2088 //	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2089 //	{
   2090 //		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
   2091 //	}
   2092 
   2093 	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2094 	{
   2095 		if(CPUID::supportsMMX2())
   2096 		{
   2097 			return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
   2098 		}
   2099 		else
   2100 		{
   2101 			return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
   2102 		}
   2103 	}
   2104 
   2105 	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2106 	{
   2107 		if(CPUID::supportsMMX2())
   2108 		{
   2109 			return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
   2110 		}
   2111 		else
   2112 		{
   2113 			return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
   2114 		}
   2115 	}
   2116 
   2117 	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
   2118 	{
   2119 		if(CPUID::supportsMMX2())
   2120 		{
   2121 			return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
   2122 		}
   2123 		else
   2124 		{
   2125 			return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
   2126 		}
   2127 	}
   2128 
   2129 //	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
   2130 //	{
   2131 //		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
   2132 //	}
   2133 
   2134 //	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
   2135 //	{
   2136 //		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
   2137 //	}
   2138 
   2139 	RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs)
   2140 	{
   2141 		return lhs = lhs + rhs;
   2142 	}
   2143 
   2144 	RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs)
   2145 	{
   2146 		return lhs = lhs - rhs;
   2147 	}
   2148 
   2149 //	RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs)
   2150 //	{
   2151 //		return lhs = lhs * rhs;
   2152 //	}
   2153 
   2154 //	RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs)
   2155 //	{
   2156 //		return lhs = lhs / rhs;
   2157 //	}
   2158 
   2159 //	RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs)
   2160 //	{
   2161 //		return lhs = lhs % rhs;
   2162 //	}
   2163 
   2164 	RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs)
   2165 	{
   2166 		return lhs = lhs & rhs;
   2167 	}
   2168 
   2169 	RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs)
   2170 	{
   2171 		return lhs = lhs | rhs;
   2172 	}
   2173 
   2174 	RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs)
   2175 	{
   2176 		return lhs = lhs ^ rhs;
   2177 	}
   2178 
   2179 //	RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs)
   2180 //	{
   2181 //		return lhs = lhs << rhs;
   2182 //	}
   2183 
   2184 //	RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs)
   2185 //	{
   2186 //		return lhs = lhs >> rhs;
   2187 //	}
   2188 
   2189 //	RValue<Byte8> operator+(RValue<Byte8> val)
   2190 //	{
   2191 //		return val;
   2192 //	}
   2193 
   2194 //	RValue<Byte8> operator-(RValue<Byte8> val)
   2195 //	{
   2196 //		return RValue<Byte8>(Nucleus::createNeg(val.value));
   2197 //	}
   2198 
   2199 	RValue<Byte8> operator~(RValue<Byte8> val)
   2200 	{
   2201 		if(CPUID::supportsMMX2())
   2202 		{
   2203 			return val ^ Byte8(0xFFFFFFFFFFFFFFFF);
   2204 		}
   2205 		else
   2206 		{
   2207 			return RValue<Byte8>(Nucleus::createNot(val.value));
   2208 		}
   2209 	}
   2210 
   2211 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
   2212 	{
   2213 		return x86::paddusb(x, y);
   2214 	}
   2215 
   2216 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
   2217 	{
   2218 		return x86::psubusb(x, y);
   2219 	}
   2220 
   2221 	RValue<Short4> Unpack(RValue<Byte4> x)
   2222 	{
   2223 		Value *int2 = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), x.value, 0);
   2224 		Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
   2225 
   2226 		return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
   2227 	}
   2228 
   2229 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
   2230 	{
   2231 		if(CPUID::supportsMMX2())
   2232 		{
   2233 			return x86::punpcklbw(x, y);
   2234 		}
   2235 		else
   2236 		{
   2237 			Constant *shuffle[8];
   2238 			shuffle[0] = Nucleus::createConstantInt(0);
   2239 			shuffle[1] = Nucleus::createConstantInt(8);
   2240 			shuffle[2] = Nucleus::createConstantInt(1);
   2241 			shuffle[3] = Nucleus::createConstantInt(9);
   2242 			shuffle[4] = Nucleus::createConstantInt(2);
   2243 			shuffle[5] = Nucleus::createConstantInt(10);
   2244 			shuffle[6] = Nucleus::createConstantInt(3);
   2245 			shuffle[7] = Nucleus::createConstantInt(11);
   2246 
   2247 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
   2248 
   2249 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
   2250 		}
   2251 	}
   2252 
   2253 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
   2254 	{
   2255 		if(CPUID::supportsMMX2())
   2256 		{
   2257 			return x86::punpckhbw(x, y);
   2258 		}
   2259 		else
   2260 		{
   2261 			Constant *shuffle[8];
   2262 			shuffle[0] = Nucleus::createConstantInt(4);
   2263 			shuffle[1] = Nucleus::createConstantInt(12);
   2264 			shuffle[2] = Nucleus::createConstantInt(5);
   2265 			shuffle[3] = Nucleus::createConstantInt(13);
   2266 			shuffle[4] = Nucleus::createConstantInt(6);
   2267 			shuffle[5] = Nucleus::createConstantInt(14);
   2268 			shuffle[6] = Nucleus::createConstantInt(7);
   2269 			shuffle[7] = Nucleus::createConstantInt(15);
   2270 
   2271 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
   2272 
   2273 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
   2274 		}
   2275 	}
   2276 
   2277 	RValue<Int> SignMask(RValue<Byte8> x)
   2278 	{
   2279 		return x86::pmovmskb(x);
   2280 	}
   2281 
   2282 //	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
   2283 //	{
   2284 //		return x86::pcmpgtb(x, y);   // FIXME: Signedness
   2285 //	}
   2286 
   2287 	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
   2288 	{
   2289 		return x86::pcmpeqb(x, y);
   2290 	}
   2291 
   2292 	Type *Byte8::getType()
   2293 	{
   2294 		if(CPUID::supportsMMX2())
   2295 		{
   2296 			return MMX::getType();
   2297 		}
   2298 		else
   2299 		{
   2300 			return VectorType::get(Byte::getType(), 8);
   2301 		}
   2302 	}
   2303 
   2304 	SByte8::SByte8()
   2305 	{
   2306 	//	xyzw.parent = this;
   2307 	}
   2308 
   2309 	SByte8::SByte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
   2310 	{
   2311 	//	xyzw.parent = this;
   2312 
   2313 		Constant *constantVector[8];
   2314 		constantVector[0] = Nucleus::createConstantByte(x0);
   2315 		constantVector[1] = Nucleus::createConstantByte(x1);
   2316 		constantVector[2] = Nucleus::createConstantByte(x2);
   2317 		constantVector[3] = Nucleus::createConstantByte(x3);
   2318 		constantVector[4] = Nucleus::createConstantByte(x4);
   2319 		constantVector[5] = Nucleus::createConstantByte(x5);
   2320 		constantVector[6] = Nucleus::createConstantByte(x6);
   2321 		constantVector[7] = Nucleus::createConstantByte(x7);
   2322 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
   2323 
   2324 		storeValue(Nucleus::createBitCast(vector, getType()));
   2325 	}
   2326 
   2327 	SByte8::SByte8(int64_t x)
   2328 	{
   2329 	//	xyzw.parent = this;
   2330 
   2331 		Constant *constantVector[8];
   2332 		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
   2333 		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
   2334 		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
   2335 		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
   2336 		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
   2337 		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
   2338 		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
   2339 		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
   2340 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
   2341 
   2342 		storeValue(Nucleus::createBitCast(vector, getType()));
   2343 	}
   2344 
   2345 	SByte8::SByte8(RValue<SByte8> rhs)
   2346 	{
   2347 	//	xyzw.parent = this;
   2348 
   2349 		storeValue(rhs.value);
   2350 	}
   2351 
   2352 	SByte8::SByte8(const SByte8 &rhs)
   2353 	{
   2354 	//	xyzw.parent = this;
   2355 
   2356 		Value *value = rhs.loadValue();
   2357 		storeValue(value);
   2358 	}
   2359 
   2360 	SByte8::SByte8(const Reference<SByte8> &rhs)
   2361 	{
   2362 	//	xyzw.parent = this;
   2363 
   2364 		Value *value = rhs.loadValue();
   2365 		storeValue(value);
   2366 	}
   2367 
   2368 	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const
   2369 	{
   2370 		storeValue(rhs.value);
   2371 
   2372 		return rhs;
   2373 	}
   2374 
   2375 	RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
   2376 	{
   2377 		Value *value = rhs.loadValue();
   2378 		storeValue(value);
   2379 
   2380 		return RValue<SByte8>(value);
   2381 	}
   2382 
   2383 	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const
   2384 	{
   2385 		Value *value = rhs.loadValue();
   2386 		storeValue(value);
   2387 
   2388 		return RValue<SByte8>(value);
   2389 	}
   2390 
   2391 	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2392 	{
   2393 		if(CPUID::supportsMMX2())
   2394 		{
   2395 			return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
   2396 		}
   2397 		else
   2398 		{
   2399 			return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
   2400 		}
   2401 	}
   2402 
   2403 	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2404 	{
   2405 		if(CPUID::supportsMMX2())
   2406 		{
   2407 			return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
   2408 		}
   2409 		else
   2410 		{
   2411 			return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
   2412 		}
   2413 	}
   2414 
   2415 //	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2416 //	{
   2417 //		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
   2418 //	}
   2419 
   2420 //	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2421 //	{
   2422 //		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
   2423 //	}
   2424 
   2425 //	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2426 //	{
   2427 //		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
   2428 //	}
   2429 
   2430 	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2431 	{
   2432 		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
   2433 	}
   2434 
   2435 	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2436 	{
   2437 		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
   2438 	}
   2439 
   2440 	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
   2441 	{
   2442 		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
   2443 	}
   2444 
   2445 //	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
   2446 //	{
   2447 //		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
   2448 //	}
   2449 
   2450 //	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
   2451 //	{
   2452 //		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
   2453 //	}
   2454 
   2455 	RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs)
   2456 	{
   2457 		return lhs = lhs + rhs;
   2458 	}
   2459 
   2460 	RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs)
   2461 	{
   2462 		return lhs = lhs - rhs;
   2463 	}
   2464 
   2465 //	RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs)
   2466 //	{
   2467 //		return lhs = lhs * rhs;
   2468 //	}
   2469 
   2470 //	RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs)
   2471 //	{
   2472 //		return lhs = lhs / rhs;
   2473 //	}
   2474 
   2475 //	RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs)
   2476 //	{
   2477 //		return lhs = lhs % rhs;
   2478 //	}
   2479 
   2480 	RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs)
   2481 	{
   2482 		return lhs = lhs & rhs;
   2483 	}
   2484 
   2485 	RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs)
   2486 	{
   2487 		return lhs = lhs | rhs;
   2488 	}
   2489 
   2490 	RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs)
   2491 	{
   2492 		return lhs = lhs ^ rhs;
   2493 	}
   2494 
   2495 //	RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs)
   2496 //	{
   2497 //		return lhs = lhs << rhs;
   2498 //	}
   2499 
   2500 //	RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs)
   2501 //	{
   2502 //		return lhs = lhs >> rhs;
   2503 //	}
   2504 
   2505 //	RValue<SByte8> operator+(RValue<SByte8> val)
   2506 //	{
   2507 //		return val;
   2508 //	}
   2509 
   2510 //	RValue<SByte8> operator-(RValue<SByte8> val)
   2511 //	{
   2512 //		return RValue<SByte8>(Nucleus::createNeg(val.value));
   2513 //	}
   2514 
   2515 	RValue<SByte8> operator~(RValue<SByte8> val)
   2516 	{
   2517 		if(CPUID::supportsMMX2())
   2518 		{
   2519 			return val ^ SByte8(0xFFFFFFFFFFFFFFFF);
   2520 		}
   2521 		else
   2522 		{
   2523 			return RValue<SByte8>(Nucleus::createNot(val.value));
   2524 		}
   2525 	}
   2526 
   2527 	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
   2528 	{
   2529 		return x86::paddsb(x, y);
   2530 	}
   2531 
   2532 	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
   2533 	{
   2534 		return x86::psubsb(x, y);
   2535 	}
   2536 
   2537 	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
   2538 	{
   2539 		if(CPUID::supportsMMX2())
   2540 		{
   2541 			return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
   2542 		}
   2543 		else
   2544 		{
   2545 			Constant *shuffle[8];
   2546 			shuffle[0] = Nucleus::createConstantInt(0);
   2547 			shuffle[1] = Nucleus::createConstantInt(8);
   2548 			shuffle[2] = Nucleus::createConstantInt(1);
   2549 			shuffle[3] = Nucleus::createConstantInt(9);
   2550 			shuffle[4] = Nucleus::createConstantInt(2);
   2551 			shuffle[5] = Nucleus::createConstantInt(10);
   2552 			shuffle[6] = Nucleus::createConstantInt(3);
   2553 			shuffle[7] = Nucleus::createConstantInt(11);
   2554 
   2555 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
   2556 
   2557 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
   2558 		}
   2559 	}
   2560 
   2561 	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
   2562 	{
   2563 		if(CPUID::supportsMMX2())
   2564 		{
   2565 			return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
   2566 		}
   2567 		else
   2568 		{
   2569 			Constant *shuffle[8];
   2570 			shuffle[0] = Nucleus::createConstantInt(4);
   2571 			shuffle[1] = Nucleus::createConstantInt(12);
   2572 			shuffle[2] = Nucleus::createConstantInt(5);
   2573 			shuffle[3] = Nucleus::createConstantInt(13);
   2574 			shuffle[4] = Nucleus::createConstantInt(6);
   2575 			shuffle[5] = Nucleus::createConstantInt(14);
   2576 			shuffle[6] = Nucleus::createConstantInt(7);
   2577 			shuffle[7] = Nucleus::createConstantInt(15);
   2578 
   2579 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
   2580 
   2581 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
   2582 		}
   2583 	}
   2584 
   2585 	RValue<Int> SignMask(RValue<SByte8> x)
   2586 	{
   2587 		return x86::pmovmskb(As<Byte8>(x));
   2588 	}
   2589 
   2590 	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
   2591 	{
   2592 		return x86::pcmpgtb(x, y);
   2593 	}
   2594 
   2595 	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
   2596 	{
   2597 		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
   2598 	}
   2599 
   2600 	Type *SByte8::getType()
   2601 	{
   2602 		if(CPUID::supportsMMX2())
   2603 		{
   2604 			return MMX::getType();
   2605 		}
   2606 		else
   2607 		{
   2608 			return VectorType::get(SByte::getType(), 8);
   2609 		}
   2610 	}
   2611 
   2612 	Byte16::Byte16(RValue<Byte16> rhs)
   2613 	{
   2614 	//	xyzw.parent = this;
   2615 
   2616 		storeValue(rhs.value);
   2617 	}
   2618 
   2619 	Byte16::Byte16(const Byte16 &rhs)
   2620 	{
   2621 	//	xyzw.parent = this;
   2622 
   2623 		Value *value = rhs.loadValue();
   2624 		storeValue(value);
   2625 	}
   2626 
   2627 	Byte16::Byte16(const Reference<Byte16> &rhs)
   2628 	{
   2629 	//	xyzw.parent = this;
   2630 
   2631 		Value *value = rhs.loadValue();
   2632 		storeValue(value);
   2633 	}
   2634 
   2635 	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const
   2636 	{
   2637 		storeValue(rhs.value);
   2638 
   2639 		return rhs;
   2640 	}
   2641 
   2642 	RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
   2643 	{
   2644 		Value *value = rhs.loadValue();
   2645 		storeValue(value);
   2646 
   2647 		return RValue<Byte16>(value);
   2648 	}
   2649 
   2650 	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const
   2651 	{
   2652 		Value *value = rhs.loadValue();
   2653 		storeValue(value);
   2654 
   2655 		return RValue<Byte16>(value);
   2656 	}
   2657 
   2658 	Type *Byte16::getType()
   2659 	{
   2660 		return VectorType::get(Byte::getType(), 16);
   2661 	}
   2662 
   2663 	Type *SByte16::getType()
   2664 	{
   2665 		return VectorType::get(SByte::getType(), 16);
   2666 	}
   2667 
   2668 	Short4::Short4(RValue<Int> cast)
   2669 	{
   2670 		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
   2671 		Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
   2672 
   2673 		storeValue(swizzle);
   2674 	}
   2675 
   2676 	Short4::Short4(RValue<Int4> cast)
   2677 	{
   2678 		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
   2679 
   2680 		#if 0   // FIXME: Check codegen (pshuflw phshufhw pshufd)
   2681 			Constant *pack[8];
   2682 			pack[0] = Nucleus::createConstantInt(0);
   2683 			pack[1] = Nucleus::createConstantInt(2);
   2684 			pack[2] = Nucleus::createConstantInt(4);
   2685 			pack[3] = Nucleus::createConstantInt(6);
   2686 
   2687 			Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
   2688 		#else
   2689 			Value *packed;
   2690 
   2691 			// FIXME: Use Swizzle<Short8>
   2692 			if(!CPUID::supportsSSSE3())
   2693 			{
   2694 				Constant *pshuflw[8];
   2695 				pshuflw[0] = Nucleus::createConstantInt(0);
   2696 				pshuflw[1] = Nucleus::createConstantInt(2);
   2697 				pshuflw[2] = Nucleus::createConstantInt(0);
   2698 				pshuflw[3] = Nucleus::createConstantInt(2);
   2699 				pshuflw[4] = Nucleus::createConstantInt(4);
   2700 				pshuflw[5] = Nucleus::createConstantInt(5);
   2701 				pshuflw[6] = Nucleus::createConstantInt(6);
   2702 				pshuflw[7] = Nucleus::createConstantInt(7);
   2703 
   2704 				Constant *pshufhw[8];
   2705 				pshufhw[0] = Nucleus::createConstantInt(0);
   2706 				pshufhw[1] = Nucleus::createConstantInt(1);
   2707 				pshufhw[2] = Nucleus::createConstantInt(2);
   2708 				pshufhw[3] = Nucleus::createConstantInt(3);
   2709 				pshufhw[4] = Nucleus::createConstantInt(4);
   2710 				pshufhw[5] = Nucleus::createConstantInt(6);
   2711 				pshufhw[6] = Nucleus::createConstantInt(4);
   2712 				pshufhw[7] = Nucleus::createConstantInt(6);
   2713 
   2714 				Value *shuffle1 = Nucleus::createShuffleVector(short8, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshuflw, 8));
   2715 				Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshufhw, 8));
   2716 				Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
   2717 				packed = Nucleus::createSwizzle(int4, 0x88);
   2718 			}
   2719 			else
   2720 			{
   2721 				Constant *pshufb[16];
   2722 				pshufb[0] = Nucleus::createConstantInt(0);
   2723 				pshufb[1] = Nucleus::createConstantInt(1);
   2724 				pshufb[2] = Nucleus::createConstantInt(4);
   2725 				pshufb[3] = Nucleus::createConstantInt(5);
   2726 				pshufb[4] = Nucleus::createConstantInt(8);
   2727 				pshufb[5] = Nucleus::createConstantInt(9);
   2728 				pshufb[6] = Nucleus::createConstantInt(12);
   2729 				pshufb[7] = Nucleus::createConstantInt(13);
   2730 				pshufb[8] = Nucleus::createConstantInt(0);
   2731 				pshufb[9] = Nucleus::createConstantInt(1);
   2732 				pshufb[10] = Nucleus::createConstantInt(4);
   2733 				pshufb[11] = Nucleus::createConstantInt(5);
   2734 				pshufb[12] = Nucleus::createConstantInt(8);
   2735 				pshufb[13] = Nucleus::createConstantInt(9);
   2736 				pshufb[14] = Nucleus::createConstantInt(12);
   2737 				pshufb[15] = Nucleus::createConstantInt(13);
   2738 
   2739 				Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
   2740 				packed = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
   2741 			}
   2742 
   2743 			#if 0   // FIXME: No optimal instruction selection
   2744 				Value *qword2 = Nucleus::createBitCast(packed, Long2::getType());
   2745 				Value *element = Nucleus::createExtractElement(qword2, 0);
   2746 				Value *short4 = Nucleus::createBitCast(element, Short4::getType());
   2747 			#else   // FIXME: Requires SSE
   2748 				Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
   2749 				Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
   2750 			#endif
   2751 		#endif
   2752 
   2753 		storeValue(short4);
   2754 	}
   2755 
   2756 //	Short4::Short4(RValue<Float> cast)
   2757 //	{
   2758 //	}
   2759 
   2760 	Short4::Short4(RValue<Float4> cast)
   2761 	{
   2762 		Int4 v4i32 = Int4(cast);
   2763 		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
   2764 
   2765 		storeValue(As<Short4>(Int2(v4i32)).value);
   2766 	}
   2767 
   2768 	Short4::Short4()
   2769 	{
   2770 	//	xyzw.parent = this;
   2771 	}
   2772 
   2773 	Short4::Short4(short xyzw)
   2774 	{
   2775 		//	xyzw.parent = this;
   2776 
   2777 		Constant *constantVector[4];
   2778 		constantVector[0] = Nucleus::createConstantShort(xyzw);
   2779 		constantVector[1] = Nucleus::createConstantShort(xyzw);
   2780 		constantVector[2] = Nucleus::createConstantShort(xyzw);
   2781 		constantVector[3] = Nucleus::createConstantShort(xyzw);
   2782 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
   2783 
   2784 		storeValue(Nucleus::createBitCast(vector, getType()));
   2785 	}
   2786 
   2787 	Short4::Short4(short x, short y, short z, short w)
   2788 	{
   2789 	//	xyzw.parent = this;
   2790 
   2791 		Constant *constantVector[4];
   2792 		constantVector[0] = Nucleus::createConstantShort(x);
   2793 		constantVector[1] = Nucleus::createConstantShort(y);
   2794 		constantVector[2] = Nucleus::createConstantShort(z);
   2795 		constantVector[3] = Nucleus::createConstantShort(w);
   2796 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
   2797 
   2798 		storeValue(Nucleus::createBitCast(vector, getType()));
   2799 	}
   2800 
   2801 	Short4::Short4(RValue<Short4> rhs)
   2802 	{
   2803 	//	xyzw.parent = this;
   2804 
   2805 		storeValue(rhs.value);
   2806 	}
   2807 
   2808 	Short4::Short4(const Short4 &rhs)
   2809 	{
   2810 	//	xyzw.parent = this;
   2811 
   2812 		Value *value = rhs.loadValue();
   2813 		storeValue(value);
   2814 	}
   2815 
   2816 	Short4::Short4(const Reference<Short4> &rhs)
   2817 	{
   2818 	//	xyzw.parent = this;
   2819 
   2820 		Value *value = rhs.loadValue();
   2821 		storeValue(value);
   2822 	}
   2823 
   2824 	Short4::Short4(RValue<UShort4> rhs)
   2825 	{
   2826 	//	xyzw.parent = this;
   2827 
   2828 		storeValue(rhs.value);
   2829 	}
   2830 
   2831 	Short4::Short4(const UShort4 &rhs)
   2832 	{
   2833 	//	xyzw.parent = this;
   2834 
   2835 		storeValue(rhs.loadValue());
   2836 	}
   2837 
   2838 	Short4::Short4(const Reference<UShort4> &rhs)
   2839 	{
   2840 	//	xyzw.parent = this;
   2841 
   2842 		storeValue(rhs.loadValue());
   2843 	}
   2844 
   2845 	RValue<Short4> Short4::operator=(RValue<Short4> rhs) const
   2846 	{
   2847 		storeValue(rhs.value);
   2848 
   2849 		return rhs;
   2850 	}
   2851 
   2852 	RValue<Short4> Short4::operator=(const Short4 &rhs) const
   2853 	{
   2854 		Value *value = rhs.loadValue();
   2855 		storeValue(value);
   2856 
   2857 		return RValue<Short4>(value);
   2858 	}
   2859 
   2860 	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const
   2861 	{
   2862 		Value *value = rhs.loadValue();
   2863 		storeValue(value);
   2864 
   2865 		return RValue<Short4>(value);
   2866 	}
   2867 
   2868 	RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const
   2869 	{
   2870 		storeValue(rhs.value);
   2871 
   2872 		return RValue<Short4>(rhs);
   2873 	}
   2874 
   2875 	RValue<Short4> Short4::operator=(const UShort4 &rhs) const
   2876 	{
   2877 		Value *value = rhs.loadValue();
   2878 		storeValue(value);
   2879 
   2880 		return RValue<Short4>(value);
   2881 	}
   2882 
   2883 	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const
   2884 	{
   2885 		Value *value = rhs.loadValue();
   2886 		storeValue(value);
   2887 
   2888 		return RValue<Short4>(value);
   2889 	}
   2890 
   2891 	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
   2892 	{
   2893 		if(CPUID::supportsMMX2())
   2894 		{
   2895 			return x86::paddw(lhs, rhs);
   2896 		}
   2897 		else
   2898 		{
   2899 			return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
   2900 		}
   2901 	}
   2902 
   2903 	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
   2904 	{
   2905 		if(CPUID::supportsMMX2())
   2906 		{
   2907 			return x86::psubw(lhs, rhs);
   2908 		}
   2909 		else
   2910 		{
   2911 			return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
   2912 		}
   2913 	}
   2914 
   2915 	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
   2916 	{
   2917 		if(CPUID::supportsMMX2())
   2918 		{
   2919 			return x86::pmullw(lhs, rhs);
   2920 		}
   2921 		else
   2922 		{
   2923 			return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
   2924 		}
   2925 	}
   2926 
   2927 //	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
   2928 //	{
   2929 //		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
   2930 //	}
   2931 
   2932 //	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
   2933 //	{
   2934 //		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
   2935 //	}
   2936 
   2937 	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
   2938 	{
   2939 		if(CPUID::supportsMMX2())
   2940 		{
   2941 			return x86::pand(lhs, rhs);
   2942 		}
   2943 		else
   2944 		{
   2945 			return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
   2946 		}
   2947 	}
   2948 
   2949 	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
   2950 	{
   2951 		if(CPUID::supportsMMX2())
   2952 		{
   2953 			return x86::por(lhs, rhs);
   2954 		}
   2955 		else
   2956 		{
   2957 			return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
   2958 		}
   2959 	}
   2960 
   2961 	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
   2962 	{
   2963 		if(CPUID::supportsMMX2())
   2964 		{
   2965 			return x86::pxor(lhs, rhs);
   2966 		}
   2967 		else
   2968 		{
   2969 			return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
   2970 		}
   2971 	}
   2972 
   2973 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
   2974 	{
   2975 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   2976 
   2977 		return x86::psllw(lhs, rhs);
   2978 	}
   2979 
   2980 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
   2981 	{
   2982 	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
   2983 
   2984 		return x86::psraw(lhs, rhs);
   2985 	}
   2986 
   2987 	RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs)
   2988 	{
   2989 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   2990 
   2991 		return x86::psllw(lhs, rhs);
   2992 	}
   2993 
   2994 	RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs)
   2995 	{
   2996 	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
   2997 
   2998 		return x86::psraw(lhs, rhs);
   2999 	}
   3000 
   3001 	RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs)
   3002 	{
   3003 		return lhs = lhs + rhs;
   3004 	}
   3005 
   3006 	RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs)
   3007 	{
   3008 		return lhs = lhs - rhs;
   3009 	}
   3010 
   3011 	RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs)
   3012 	{
   3013 		return lhs = lhs * rhs;
   3014 	}
   3015 
   3016 //	RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs)
   3017 //	{
   3018 //		return lhs = lhs / rhs;
   3019 //	}
   3020 
   3021 //	RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs)
   3022 //	{
   3023 //		return lhs = lhs % rhs;
   3024 //	}
   3025 
   3026 	RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs)
   3027 	{
   3028 		return lhs = lhs & rhs;
   3029 	}
   3030 
   3031 	RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs)
   3032 	{
   3033 		return lhs = lhs | rhs;
   3034 	}
   3035 
   3036 	RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs)
   3037 	{
   3038 		return lhs = lhs ^ rhs;
   3039 	}
   3040 
   3041 	RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
   3042 	{
   3043 		return lhs = lhs << rhs;
   3044 	}
   3045 
   3046 	RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
   3047 	{
   3048 		return lhs = lhs >> rhs;
   3049 	}
   3050 
   3051 	RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs)
   3052 	{
   3053 		return lhs = lhs << rhs;
   3054 	}
   3055 
   3056 	RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs)
   3057 	{
   3058 		return lhs = lhs >> rhs;
   3059 	}
   3060 
   3061 //	RValue<Short4> operator+(RValue<Short4> val)
   3062 //	{
   3063 //		return val;
   3064 //	}
   3065 
   3066 	RValue<Short4> operator-(RValue<Short4> val)
   3067 	{
   3068 		if(CPUID::supportsMMX2())
   3069 		{
   3070 			return Short4(0, 0, 0, 0) - val;
   3071 		}
   3072 		else
   3073 		{
   3074 			return RValue<Short4>(Nucleus::createNeg(val.value));
   3075 		}
   3076 	}
   3077 
   3078 	RValue<Short4> operator~(RValue<Short4> val)
   3079 	{
   3080 		if(CPUID::supportsMMX2())
   3081 		{
   3082 			return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
   3083 		}
   3084 		else
   3085 		{
   3086 			return RValue<Short4>(Nucleus::createNot(val.value));
   3087 		}
   3088 	}
   3089 
   3090 	RValue<Short4> RoundShort4(RValue<Float4> cast)
   3091 	{
   3092 		RValue<Int4> v4i32 = x86::cvtps2dq(cast);
   3093 		RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
   3094 
   3095 		return As<Short4>(Int2(As<Int4>(v8i16)));
   3096 	}
   3097 
   3098 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
   3099 	{
   3100 		return x86::pmaxsw(x, y);
   3101 	}
   3102 
   3103 	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
   3104 	{
   3105 		return x86::pminsw(x, y);
   3106 	}
   3107 
   3108 	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
   3109 	{
   3110 		return x86::paddsw(x, y);
   3111 	}
   3112 
   3113 	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
   3114 	{
   3115 		return x86::psubsw(x, y);
   3116 	}
   3117 
   3118 	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
   3119 	{
   3120 		return x86::pmulhw(x, y);
   3121 	}
   3122 
   3123 	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
   3124 	{
   3125 		return x86::pmaddwd(x, y);
   3126 	}
   3127 
   3128 	RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
   3129 	{
   3130 		return x86::packsswb(x, y);
   3131 	}
   3132 
   3133 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
   3134 	{
   3135 		if(CPUID::supportsMMX2())
   3136 		{
   3137 			return x86::punpcklwd(x, y);
   3138 		}
   3139 		else
   3140 		{
   3141 			Constant *shuffle[4];
   3142 			shuffle[0] = Nucleus::createConstantInt(0);
   3143 			shuffle[1] = Nucleus::createConstantInt(4);
   3144 			shuffle[2] = Nucleus::createConstantInt(1);
   3145 			shuffle[3] = Nucleus::createConstantInt(5);
   3146 
   3147 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
   3148 
   3149 			return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
   3150 		}
   3151 	}
   3152 
   3153 	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
   3154 	{
   3155 		if(CPUID::supportsMMX2())
   3156 		{
   3157 			return x86::punpckhwd(x, y);
   3158 		}
   3159 		else
   3160 		{
   3161 			Constant *shuffle[4];
   3162 			shuffle[0] = Nucleus::createConstantInt(2);
   3163 			shuffle[1] = Nucleus::createConstantInt(6);
   3164 			shuffle[2] = Nucleus::createConstantInt(3);
   3165 			shuffle[3] = Nucleus::createConstantInt(7);
   3166 
   3167 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
   3168 
   3169 			return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
   3170 		}
   3171 	}
   3172 
   3173 	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
   3174 	{
   3175 		if(CPUID::supportsMMX2())
   3176 		{
   3177 			return x86::pshufw(x, select);
   3178 		}
   3179 		else
   3180 		{
   3181 			return RValue<Short4>(Nucleus::createSwizzle(x.value, select));
   3182 		}
   3183 	}
   3184 
   3185 	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
   3186 	{
   3187 		if(CPUID::supportsMMX2())
   3188 		{
   3189 			return x86::pinsrw(val, Int(element), i);
   3190 		}
   3191 		else
   3192 		{
   3193 			return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
   3194 		}
   3195 	}
   3196 
   3197 	RValue<Short> Extract(RValue<Short4> val, int i)
   3198 	{
   3199 		if(CPUID::supportsMMX2())
   3200 		{
   3201 			return Short(x86::pextrw(val, i));
   3202 		}
   3203 		else
   3204 		{
   3205 			return RValue<Short>(Nucleus::createExtractElement(val.value, i));
   3206 		}
   3207 	}
   3208 
   3209 	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
   3210 	{
   3211 		return x86::pcmpgtw(x, y);
   3212 	}
   3213 
   3214 	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
   3215 	{
   3216 		return x86::pcmpeqw(x, y);
   3217 	}
   3218 
   3219 	Type *Short4::getType()
   3220 	{
   3221 		if(CPUID::supportsMMX2())
   3222 		{
   3223 			return MMX::getType();
   3224 		}
   3225 		else
   3226 		{
   3227 			return VectorType::get(Short::getType(), 4);
   3228 		}
   3229 	}
   3230 
   3231 	UShort4::UShort4(RValue<Int4> cast)
   3232 	{
   3233 		*this = Short4(cast);
   3234 	}
   3235 
   3236 	UShort4::UShort4(RValue<Float4> cast, bool saturate)
   3237 	{
   3238 		Float4 sat;
   3239 
   3240 		if(saturate)
   3241 		{
   3242 			if(CPUID::supportsSSE4_1())
   3243 			{
   3244 				sat = Min(cast, Float4(0xFFFF));   // packusdw takes care of 0x0000 saturation
   3245 			}
   3246 			else
   3247 			{
   3248 				sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
   3249 			}
   3250 		}
   3251 		else
   3252 		{
   3253 			sat = cast;
   3254 		}
   3255 
   3256 		Int4 int4(sat);
   3257 
   3258 		if(!saturate || !CPUID::supportsSSE4_1())
   3259 		{
   3260 			*this = Short4(Int4(int4));
   3261 		}
   3262 		else
   3263 		{
   3264 			*this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
   3265 		}
   3266 	}
   3267 
   3268 	UShort4::UShort4()
   3269 	{
   3270 	//	xyzw.parent = this;
   3271 	}
   3272 
   3273 	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
   3274 	{
   3275 	//	xyzw.parent = this;
   3276 
   3277 		Constant *constantVector[4];
   3278 		constantVector[0] = Nucleus::createConstantShort(x);
   3279 		constantVector[1] = Nucleus::createConstantShort(y);
   3280 		constantVector[2] = Nucleus::createConstantShort(z);
   3281 		constantVector[3] = Nucleus::createConstantShort(w);
   3282 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
   3283 
   3284 		storeValue(Nucleus::createBitCast(vector, getType()));
   3285 	}
   3286 
   3287 	UShort4::UShort4(RValue<UShort4> rhs)
   3288 	{
   3289 	//	xyzw.parent = this;
   3290 
   3291 		storeValue(rhs.value);
   3292 	}
   3293 
   3294 	UShort4::UShort4(const UShort4 &rhs)
   3295 	{
   3296 	//	xyzw.parent = this;
   3297 
   3298 		Value *value = rhs.loadValue();
   3299 		storeValue(value);
   3300 	}
   3301 
   3302 	UShort4::UShort4(const Reference<UShort4> &rhs)
   3303 	{
   3304 	//	xyzw.parent = this;
   3305 
   3306 		Value *value = rhs.loadValue();
   3307 		storeValue(value);
   3308 	}
   3309 
   3310 	UShort4::UShort4(RValue<Short4> rhs)
   3311 	{
   3312 	//	xyzw.parent = this;
   3313 
   3314 		storeValue(rhs.value);
   3315 	}
   3316 
   3317 	UShort4::UShort4(const Short4 &rhs)
   3318 	{
   3319 	//	xyzw.parent = this;
   3320 
   3321 		Value *value = rhs.loadValue();
   3322 		storeValue(value);
   3323 	}
   3324 
   3325 	UShort4::UShort4(const Reference<Short4> &rhs)
   3326 	{
   3327 	//	xyzw.parent = this;
   3328 
   3329 		Value *value = rhs.loadValue();
   3330 		storeValue(value);
   3331 	}
   3332 
   3333 	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const
   3334 	{
   3335 		storeValue(rhs.value);
   3336 
   3337 		return rhs;
   3338 	}
   3339 
   3340 	RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
   3341 	{
   3342 		Value *value = rhs.loadValue();
   3343 		storeValue(value);
   3344 
   3345 		return RValue<UShort4>(value);
   3346 	}
   3347 
   3348 	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const
   3349 	{
   3350 		Value *value = rhs.loadValue();
   3351 		storeValue(value);
   3352 
   3353 		return RValue<UShort4>(value);
   3354 	}
   3355 
   3356 	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const
   3357 	{
   3358 		storeValue(rhs.value);
   3359 
   3360 		return RValue<UShort4>(rhs);
   3361 	}
   3362 
   3363 	RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
   3364 	{
   3365 		Value *value = rhs.loadValue();
   3366 		storeValue(value);
   3367 
   3368 		return RValue<UShort4>(value);
   3369 	}
   3370 
   3371 	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const
   3372 	{
   3373 		Value *value = rhs.loadValue();
   3374 		storeValue(value);
   3375 
   3376 		return RValue<UShort4>(value);
   3377 	}
   3378 
   3379 	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3380 	{
   3381 		if(CPUID::supportsMMX2())
   3382 		{
   3383 			return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
   3384 		}
   3385 		else
   3386 		{
   3387 			return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
   3388 		}
   3389 	}
   3390 
   3391 	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3392 	{
   3393 		if(CPUID::supportsMMX2())
   3394 		{
   3395 			return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
   3396 		}
   3397 		else
   3398 		{
   3399 			return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
   3400 		}
   3401 	}
   3402 
   3403 
   3404 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
   3405 	{
   3406 		if(CPUID::supportsMMX2())
   3407 		{
   3408 			return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
   3409 		}
   3410 		else
   3411 		{
   3412 			return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
   3413 		}
   3414 	}
   3415 
   3416 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
   3417 	{
   3418 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   3419 
   3420 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
   3421 	}
   3422 
   3423 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
   3424 	{
   3425 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
   3426 
   3427 		return x86::psrlw(lhs, rhs);
   3428 	}
   3429 
   3430 	RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs)
   3431 	{
   3432 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
   3433 
   3434 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
   3435 	}
   3436 
   3437 	RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs)
   3438 	{
   3439 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
   3440 
   3441 		return x86::psrlw(lhs, rhs);
   3442 	}
   3443 
   3444 	RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
   3445 	{
   3446 		return lhs = lhs << rhs;
   3447 	}
   3448 
   3449 	RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
   3450 	{
   3451 		return lhs = lhs >> rhs;
   3452 	}
   3453 
   3454 	RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs)
   3455 	{
   3456 		return lhs = lhs << rhs;
   3457 	}
   3458 
   3459 	RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs)
   3460 	{
   3461 		return lhs = lhs >> rhs;
   3462 	}
   3463 
   3464 	RValue<UShort4> operator~(RValue<UShort4> val)
   3465 	{
   3466 		if(CPUID::supportsMMX2())
   3467 		{
   3468 			return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
   3469 		}
   3470 		else
   3471 		{
   3472 			return RValue<UShort4>(Nucleus::createNot(val.value));
   3473 		}
   3474 	}
   3475 
   3476 	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
   3477 	{
   3478 		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3479 	}
   3480 
   3481 	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
   3482 	{
   3483 		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
   3484 	}
   3485 
   3486 	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
   3487 	{
   3488 		return x86::paddusw(x, y);
   3489 	}
   3490 
   3491 	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
   3492 	{
   3493 		return x86::psubusw(x, y);
   3494 	}
   3495 
   3496 	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
   3497 	{
   3498 		return x86::pmulhuw(x, y);
   3499 	}
   3500 
   3501 	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
   3502 	{
   3503 		return x86::pavgw(x, y);
   3504 	}
   3505 
   3506 	RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
   3507 	{
   3508 		return x86::packuswb(x, y);
   3509 	}
   3510 
   3511 	Type *UShort4::getType()
   3512 	{
   3513 		if(CPUID::supportsMMX2())
   3514 		{
   3515 			return MMX::getType();
   3516 		}
   3517 		else
   3518 		{
   3519 			return VectorType::get(UShort::getType(), 4);
   3520 		}
   3521 	}
   3522 
   3523 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
   3524 	{
   3525 	//	xyzw.parent = this;
   3526 
   3527 		Constant *constantVector[8];
   3528 		constantVector[0] = Nucleus::createConstantShort(c0);
   3529 		constantVector[1] = Nucleus::createConstantShort(c1);
   3530 		constantVector[2] = Nucleus::createConstantShort(c2);
   3531 		constantVector[3] = Nucleus::createConstantShort(c3);
   3532 		constantVector[4] = Nucleus::createConstantShort(c4);
   3533 		constantVector[5] = Nucleus::createConstantShort(c5);
   3534 		constantVector[6] = Nucleus::createConstantShort(c6);
   3535 		constantVector[7] = Nucleus::createConstantShort(c7);
   3536 
   3537 		storeValue(Nucleus::createConstantVector(constantVector, 8));
   3538 	}
   3539 
   3540 	Short8::Short8(RValue<Short8> rhs)
   3541 	{
   3542 	//	xyzw.parent = this;
   3543 
   3544 		storeValue(rhs.value);
   3545 	}
   3546 
   3547 	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
   3548 	{
   3549 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
   3550 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
   3551 
   3552 		Value *long2 = UndefValue::get(Long2::getType());
   3553 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
   3554 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
   3555 		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
   3556 
   3557 		storeValue(short8);
   3558 	}
   3559 
   3560 	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
   3561 	{
   3562 		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
   3563 	}
   3564 
   3565 	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
   3566 	{
   3567 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
   3568 	}
   3569 
   3570 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
   3571 	{
   3572 		return x86::psllw(lhs, rhs);   // FIXME: Fallback required
   3573 	}
   3574 
   3575 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
   3576 	{
   3577 		return x86::psraw(lhs, rhs);   // FIXME: Fallback required
   3578 	}
   3579 
   3580 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
   3581 	{
   3582 		return x86::pmaddwd(x, y);   // FIXME: Fallback required
   3583 	}
   3584 
   3585 	RValue<Int4> Abs(RValue<Int4> x)
   3586 	{
   3587 		if(CPUID::supportsSSSE3())
   3588 		{
   3589 			return x86::pabsd(x);
   3590 		}
   3591 		else
   3592 		{
   3593 			Int4 mask = (x >> 31);
   3594 			return (mask ^ x) - mask;
   3595 		}
   3596 	}
   3597 
   3598 	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
   3599 	{
   3600 		return x86::pmulhw(x, y);   // FIXME: Fallback required
   3601 	}
   3602 
   3603 	Type *Short8::getType()
   3604 	{
   3605 		return VectorType::get(Short::getType(), 8);
   3606 	}
   3607 
   3608 	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
   3609 	{
   3610 	//	xyzw.parent = this;
   3611 
   3612 		Constant *constantVector[8];
   3613 		constantVector[0] = Nucleus::createConstantShort(c0);
   3614 		constantVector[1] = Nucleus::createConstantShort(c1);
   3615 		constantVector[2] = Nucleus::createConstantShort(c2);
   3616 		constantVector[3] = Nucleus::createConstantShort(c3);
   3617 		constantVector[4] = Nucleus::createConstantShort(c4);
   3618 		constantVector[5] = Nucleus::createConstantShort(c5);
   3619 		constantVector[6] = Nucleus::createConstantShort(c6);
   3620 		constantVector[7] = Nucleus::createConstantShort(c7);
   3621 
   3622 		storeValue(Nucleus::createConstantVector(constantVector, 8));
   3623 	}
   3624 
   3625 	UShort8::UShort8(RValue<UShort8> rhs)
   3626 	{
   3627 	//	xyzw.parent = this;
   3628 
   3629 		storeValue(rhs.value);
   3630 	}
   3631 
   3632 	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
   3633 	{
   3634 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
   3635 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
   3636 
   3637 		Value *long2 = UndefValue::get(Long2::getType());
   3638 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
   3639 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
   3640 		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
   3641 
   3642 		storeValue(short8);
   3643 	}
   3644 
   3645 	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const
   3646 	{
   3647 		storeValue(rhs.value);
   3648 
   3649 		return rhs;
   3650 	}
   3651 
   3652 	RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
   3653 	{
   3654 		Value *value = rhs.loadValue();
   3655 		storeValue(value);
   3656 
   3657 		return RValue<UShort8>(value);
   3658 	}
   3659 
   3660 	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const
   3661 	{
   3662 		Value *value = rhs.loadValue();
   3663 		storeValue(value);
   3664 
   3665 		return RValue<UShort8>(value);
   3666 	}
   3667 
   3668 	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3669 	{
   3670 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
   3671 	}
   3672 
   3673 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
   3674 	{
   3675 		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
   3676 	}
   3677 
   3678 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
   3679 	{
   3680 		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
   3681 	}
   3682 
   3683 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3684 	{
   3685 		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
   3686 	}
   3687 
   3688 	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
   3689 	{
   3690 		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
   3691 	}
   3692 
   3693 	RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs)
   3694 	{
   3695 		return lhs = lhs + rhs;
   3696 	}
   3697 
   3698 	RValue<UShort8> operator~(RValue<UShort8> val)
   3699 	{
   3700 		return RValue<UShort8>(Nucleus::createNot(val.value));
   3701 	}
   3702 
   3703 	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
   3704 	{
   3705 		Constant *pshufb[16];
   3706 		pshufb[0] = Nucleus::createConstantInt(select0 + 0);
   3707 		pshufb[1] = Nucleus::createConstantInt(select0 + 1);
   3708 		pshufb[2] = Nucleus::createConstantInt(select1 + 0);
   3709 		pshufb[3] = Nucleus::createConstantInt(select1 + 1);
   3710 		pshufb[4] = Nucleus::createConstantInt(select2 + 0);
   3711 		pshufb[5] = Nucleus::createConstantInt(select2 + 1);
   3712 		pshufb[6] = Nucleus::createConstantInt(select3 + 0);
   3713 		pshufb[7] = Nucleus::createConstantInt(select3 + 1);
   3714 		pshufb[8] = Nucleus::createConstantInt(select4 + 0);
   3715 		pshufb[9] = Nucleus::createConstantInt(select4 + 1);
   3716 		pshufb[10] = Nucleus::createConstantInt(select5 + 0);
   3717 		pshufb[11] = Nucleus::createConstantInt(select5 + 1);
   3718 		pshufb[12] = Nucleus::createConstantInt(select6 + 0);
   3719 		pshufb[13] = Nucleus::createConstantInt(select6 + 1);
   3720 		pshufb[14] = Nucleus::createConstantInt(select7 + 0);
   3721 		pshufb[15] = Nucleus::createConstantInt(select7 + 1);
   3722 
   3723 		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
   3724 		Value *shuffle = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
   3725 		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
   3726 
   3727 		return RValue<UShort8>(short8);
   3728 	}
   3729 
   3730 	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
   3731 	{
   3732 		return x86::pmulhuw(x, y);   // FIXME: Fallback required
   3733 	}
   3734 
   3735 	// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
   3736 //	RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
   3737 //	{
   3738 //		Constant *pshufb[16];
   3739 //		pshufb[0] = Nucleus::createConstantInt(element + 0);
   3740 //		pshufb[1] = Nucleus::createConstantInt(element + 0);
   3741 //		pshufb[2] = Nucleus::createConstantInt(element + 4);
   3742 //		pshufb[3] = Nucleus::createConstantInt(element + 4);
   3743 //		pshufb[4] = Nucleus::createConstantInt(element + 8);
   3744 //		pshufb[5] = Nucleus::createConstantInt(element + 8);
   3745 //		pshufb[6] = Nucleus::createConstantInt(element + 12);
   3746 //		pshufb[7] = Nucleus::createConstantInt(element + 12);
   3747 //		pshufb[8] = Nucleus::createConstantInt(element + 16);
   3748 //		pshufb[9] = Nucleus::createConstantInt(element + 16);
   3749 //		pshufb[10] = Nucleus::createConstantInt(element + 20);
   3750 //		pshufb[11] = Nucleus::createConstantInt(element + 20);
   3751 //		pshufb[12] = Nucleus::createConstantInt(element + 24);
   3752 //		pshufb[13] = Nucleus::createConstantInt(element + 24);
   3753 //		pshufb[14] = Nucleus::createConstantInt(element + 28);
   3754 //		pshufb[15] = Nucleus::createConstantInt(element + 28);
   3755 //
   3756 //		Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16));
   3757 //		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
   3758 //
   3759 //		return RValue<UShort8>(short8);
   3760 //	}
   3761 
   3762 	Type *UShort8::getType()
   3763 	{
   3764 		return VectorType::get(UShort::getType(), 8);
   3765 	}
   3766 
   3767 	Int::Int(Argument<Int> argument)
   3768 	{
   3769 		storeValue(argument.value);
   3770 	}
   3771 
   3772 	Int::Int(RValue<Byte> cast)
   3773 	{
   3774 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   3775 
   3776 		storeValue(integer);
   3777 	}
   3778 
   3779 	Int::Int(RValue<SByte> cast)
   3780 	{
   3781 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   3782 
   3783 		storeValue(integer);
   3784 	}
   3785 
   3786 	Int::Int(RValue<Short> cast)
   3787 	{
   3788 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
   3789 
   3790 		storeValue(integer);
   3791 	}
   3792 
   3793 	Int::Int(RValue<UShort> cast)
   3794 	{
   3795 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
   3796 
   3797 		storeValue(integer);
   3798 	}
   3799 
   3800 	Int::Int(RValue<Int2> cast)
   3801 	{
   3802 		*this = Extract(cast, 0);
   3803 	}
   3804 
   3805 	Int::Int(RValue<Long> cast)
   3806 	{
   3807 		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
   3808 
   3809 		storeValue(integer);
   3810 	}
   3811 
   3812 	Int::Int(RValue<Float> cast)
   3813 	{
   3814 		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
   3815 
   3816 		storeValue(integer);
   3817 	}
   3818 
   3819 	Int::Int()
   3820 	{
   3821 	}
   3822 
   3823 	Int::Int(int x)
   3824 	{
   3825 		storeValue(Nucleus::createConstantInt(x));
   3826 	}
   3827 
   3828 	Int::Int(RValue<Int> rhs)
   3829 	{
   3830 		storeValue(rhs.value);
   3831 	}
   3832 
   3833 	Int::Int(RValue<UInt> rhs)
   3834 	{
   3835 		storeValue(rhs.value);
   3836 	}
   3837 
   3838 	Int::Int(const Int &rhs)
   3839 	{
   3840 		Value *value = rhs.loadValue();
   3841 		storeValue(value);
   3842 	}
   3843 
   3844 	Int::Int(const Reference<Int> &rhs)
   3845 	{
   3846 		Value *value = rhs.loadValue();
   3847 		storeValue(value);
   3848 	}
   3849 
   3850 	Int::Int(const UInt &rhs)
   3851 	{
   3852 		Value *value = rhs.loadValue();
   3853 		storeValue(value);
   3854 	}
   3855 
   3856 	Int::Int(const Reference<UInt> &rhs)
   3857 	{
   3858 		Value *value = rhs.loadValue();
   3859 		storeValue(value);
   3860 	}
   3861 
   3862 	RValue<Int> Int::operator=(int rhs) const
   3863 	{
   3864 		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
   3865 	}
   3866 
   3867 	RValue<Int> Int::operator=(RValue<Int> rhs) const
   3868 	{
   3869 		storeValue(rhs.value);
   3870 
   3871 		return rhs;
   3872 	}
   3873 
   3874 	RValue<Int> Int::operator=(RValue<UInt> rhs) const
   3875 	{
   3876 		storeValue(rhs.value);
   3877 
   3878 		return RValue<Int>(rhs);
   3879 	}
   3880 
   3881 	RValue<Int> Int::operator=(const Int &rhs) const
   3882 	{
   3883 		Value *value = rhs.loadValue();
   3884 		storeValue(value);
   3885 
   3886 		return RValue<Int>(value);
   3887 	}
   3888 
   3889 	RValue<Int> Int::operator=(const Reference<Int> &rhs) const
   3890 	{
   3891 		Value *value = rhs.loadValue();
   3892 		storeValue(value);
   3893 
   3894 		return RValue<Int>(value);
   3895 	}
   3896 
   3897 	RValue<Int> Int::operator=(const UInt &rhs) const
   3898 	{
   3899 		Value *value = rhs.loadValue();
   3900 		storeValue(value);
   3901 
   3902 		return RValue<Int>(value);
   3903 	}
   3904 
   3905 	RValue<Int> Int::operator=(const Reference<UInt> &rhs) const
   3906 	{
   3907 		Value *value = rhs.loadValue();
   3908 		storeValue(value);
   3909 
   3910 		return RValue<Int>(value);
   3911 	}
   3912 
   3913 	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
   3914 	{
   3915 		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
   3916 	}
   3917 
   3918 	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
   3919 	{
   3920 		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
   3921 	}
   3922 
   3923 	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
   3924 	{
   3925 		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
   3926 	}
   3927 
   3928 	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
   3929 	{
   3930 		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
   3931 	}
   3932 
   3933 	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
   3934 	{
   3935 		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
   3936 	}
   3937 
   3938 	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
   3939 	{
   3940 		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
   3941 	}
   3942 
   3943 	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
   3944 	{
   3945 		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
   3946 	}
   3947 
   3948 	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
   3949 	{
   3950 		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
   3951 	}
   3952 
   3953 	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
   3954 	{
   3955 		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
   3956 	}
   3957 
   3958 	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
   3959 	{
   3960 		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
   3961 	}
   3962 
   3963 	RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs)
   3964 	{
   3965 		return lhs = lhs + rhs;
   3966 	}
   3967 
   3968 	RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs)
   3969 	{
   3970 		return lhs = lhs - rhs;
   3971 	}
   3972 
   3973 	RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs)
   3974 	{
   3975 		return lhs = lhs * rhs;
   3976 	}
   3977 
   3978 	RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs)
   3979 	{
   3980 		return lhs = lhs / rhs;
   3981 	}
   3982 
   3983 	RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs)
   3984 	{
   3985 		return lhs = lhs % rhs;
   3986 	}
   3987 
   3988 	RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs)
   3989 	{
   3990 		return lhs = lhs & rhs;
   3991 	}
   3992 
   3993 	RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs)
   3994 	{
   3995 		return lhs = lhs | rhs;
   3996 	}
   3997 
   3998 	RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs)
   3999 	{
   4000 		return lhs = lhs ^ rhs;
   4001 	}
   4002 
   4003 	RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs)
   4004 	{
   4005 		return lhs = lhs << rhs;
   4006 	}
   4007 
   4008 	RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs)
   4009 	{
   4010 		return lhs = lhs >> rhs;
   4011 	}
   4012 
   4013 	RValue<Int> operator+(RValue<Int> val)
   4014 	{
   4015 		return val;
   4016 	}
   4017 
   4018 	RValue<Int> operator-(RValue<Int> val)
   4019 	{
   4020 		return RValue<Int>(Nucleus::createNeg(val.value));
   4021 	}
   4022 
   4023 	RValue<Int> operator~(RValue<Int> val)
   4024 	{
   4025 		return RValue<Int>(Nucleus::createNot(val.value));
   4026 	}
   4027 
   4028 	RValue<Int> operator++(const Int &val, int)   // Post-increment
   4029 	{
   4030 		RValue<Int> res = val;
   4031 
   4032 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
   4033 		val.storeValue(inc);
   4034 
   4035 		return res;
   4036 	}
   4037 
   4038 	const Int &operator++(const Int &val)   // Pre-increment
   4039 	{
   4040 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
   4041 		val.storeValue(inc);
   4042 
   4043 		return val;
   4044 	}
   4045 
   4046 	RValue<Int> operator--(const Int &val, int)   // Post-decrement
   4047 	{
   4048 		RValue<Int> res = val;
   4049 
   4050 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
   4051 		val.storeValue(inc);
   4052 
   4053 		return res;
   4054 	}
   4055 
   4056 	const Int &operator--(const Int &val)   // Pre-decrement
   4057 	{
   4058 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
   4059 		val.storeValue(inc);
   4060 
   4061 		return val;
   4062 	}
   4063 
   4064 	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
   4065 	{
   4066 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
   4067 	}
   4068 
   4069 	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
   4070 	{
   4071 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
   4072 	}
   4073 
   4074 	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
   4075 	{
   4076 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
   4077 	}
   4078 
   4079 	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
   4080 	{
   4081 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
   4082 	}
   4083 
   4084 	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
   4085 	{
   4086 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   4087 	}
   4088 
   4089 	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
   4090 	{
   4091 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   4092 	}
   4093 
   4094 	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
   4095 	{
   4096 		return IfThenElse(x > y, x, y);
   4097 	}
   4098 
   4099 	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
   4100 	{
   4101 		return IfThenElse(x < y, x, y);
   4102 	}
   4103 
   4104 	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
   4105 	{
   4106 		return Min(Max(x, min), max);
   4107 	}
   4108 
   4109 	RValue<Int> RoundInt(RValue<Float> cast)
   4110 	{
   4111 		return x86::cvtss2si(cast);
   4112 
   4113 	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
   4114 	}
   4115 
   4116 	Type *Int::getType()
   4117 	{
   4118 		return Type::getInt32Ty(*Nucleus::getContext());
   4119 	}
   4120 
   4121 	Long::Long(RValue<Int> cast)
   4122 	{
   4123 
   4124 
   4125 		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
   4126 
   4127 		storeValue(integer);
   4128 	}
   4129 
   4130 	Long::Long(RValue<UInt> cast)
   4131 	{
   4132 		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
   4133 
   4134 		storeValue(integer);
   4135 	}
   4136 
   4137 	Long::Long()
   4138 	{
   4139 	}
   4140 
   4141 	Long::Long(RValue<Long> rhs)
   4142 	{
   4143 		storeValue(rhs.value);
   4144 	}
   4145 
   4146 	RValue<Long> Long::operator=(int64_t rhs) const
   4147 	{
   4148 		return RValue<Long>(storeValue(Nucleus::createConstantInt(rhs)));
   4149 	}
   4150 
   4151 	RValue<Long> Long::operator=(RValue<Long> rhs) const
   4152 	{
   4153 		storeValue(rhs.value);
   4154 
   4155 		return rhs;
   4156 	}
   4157 
   4158 	RValue<Long> Long::operator=(const Long &rhs) const
   4159 	{
   4160 		Value *value = rhs.loadValue();
   4161 		storeValue(value);
   4162 
   4163 		return RValue<Long>(value);
   4164 	}
   4165 
   4166 	RValue<Long> Long::operator=(const Reference<Long> &rhs) const
   4167 	{
   4168 		Value *value = rhs.loadValue();
   4169 		storeValue(value);
   4170 
   4171 		return RValue<Long>(value);
   4172 	}
   4173 
   4174 	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
   4175 	{
   4176 		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
   4177 	}
   4178 
   4179 	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
   4180 	{
   4181 		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
   4182 	}
   4183 
   4184 	RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs)
   4185 	{
   4186 		return lhs = lhs + rhs;
   4187 	}
   4188 
   4189 	RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs)
   4190 	{
   4191 		return lhs = lhs - rhs;
   4192 	}
   4193 
   4194 	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
   4195 	{
   4196 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
   4197 	}
   4198 
   4199 	Type *Long::getType()
   4200 	{
   4201 		return Type::getInt64Ty(*Nucleus::getContext());
   4202 	}
   4203 
   4204 	Long1::Long1(const RValue<UInt> cast)
   4205 	{
   4206 		Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0);
   4207 		Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1);
   4208 
   4209 		storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
   4210 	}
   4211 
   4212 	Long1::Long1(RValue<Long1> rhs)
   4213 	{
   4214 		storeValue(rhs.value);
   4215 	}
   4216 
   4217 	Type *Long1::getType()
   4218 	{
   4219 		if(CPUID::supportsMMX2())
   4220 		{
   4221 			return MMX::getType();
   4222 		}
   4223 		else
   4224 		{
   4225 			return VectorType::get(Long::getType(), 1);
   4226 		}
   4227 	}
   4228 
   4229 	RValue<Long2> UnpackHigh(RValue<Long2> x, RValue<Long2> y)
   4230 	{
   4231 		Constant *shuffle[2];
   4232 		shuffle[0] = Nucleus::createConstantInt(1);
   4233 		shuffle[1] = Nucleus::createConstantInt(3);
   4234 
   4235 		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
   4236 
   4237 		return RValue<Long2>(packed);
   4238 	}
   4239 
   4240 	Type *Long2::getType()
   4241 	{
   4242 		return VectorType::get(Long::getType(), 2);
   4243 	}
   4244 
   4245 	UInt::UInt(Argument<UInt> argument)
   4246 	{
   4247 		storeValue(argument.value);
   4248 	}
   4249 
   4250 	UInt::UInt(RValue<UShort> cast)
   4251 	{
   4252 		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
   4253 
   4254 		storeValue(integer);
   4255 	}
   4256 
   4257 	UInt::UInt(RValue<Long> cast)
   4258 	{
   4259 		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
   4260 
   4261 		storeValue(integer);
   4262 	}
   4263 
   4264 	UInt::UInt(RValue<Float> cast)
   4265 	{
   4266 		Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
   4267 
   4268 		storeValue(integer);
   4269 	}
   4270 
   4271 	UInt::UInt()
   4272 	{
   4273 	}
   4274 
   4275 	UInt::UInt(int x)
   4276 	{
   4277 		storeValue(Nucleus::createConstantInt(x));
   4278 	}
   4279 
   4280 	UInt::UInt(unsigned int x)
   4281 	{
   4282 		storeValue(Nucleus::createConstantInt(x));
   4283 	}
   4284 
   4285 	UInt::UInt(RValue<UInt> rhs)
   4286 	{
   4287 		storeValue(rhs.value);
   4288 	}
   4289 
   4290 	UInt::UInt(RValue<Int> rhs)
   4291 	{
   4292 		storeValue(rhs.value);
   4293 	}
   4294 
   4295 	UInt::UInt(const UInt &rhs)
   4296 	{
   4297 		Value *value = rhs.loadValue();
   4298 		storeValue(value);
   4299 	}
   4300 
   4301 	UInt::UInt(const Reference<UInt> &rhs)
   4302 	{
   4303 		Value *value = rhs.loadValue();
   4304 		storeValue(value);
   4305 	}
   4306 
   4307 	UInt::UInt(const Int &rhs)
   4308 	{
   4309 		Value *value = rhs.loadValue();
   4310 		storeValue(value);
   4311 	}
   4312 
   4313 	UInt::UInt(const Reference<Int> &rhs)
   4314 	{
   4315 		Value *value = rhs.loadValue();
   4316 		storeValue(value);
   4317 	}
   4318 
   4319 	RValue<UInt> UInt::operator=(unsigned int rhs) const
   4320 	{
   4321 		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
   4322 	}
   4323 
   4324 	RValue<UInt> UInt::operator=(RValue<UInt> rhs) const
   4325 	{
   4326 		storeValue(rhs.value);
   4327 
   4328 		return rhs;
   4329 	}
   4330 
   4331 	RValue<UInt> UInt::operator=(RValue<Int> rhs) const
   4332 	{
   4333 		storeValue(rhs.value);
   4334 
   4335 		return RValue<UInt>(rhs);
   4336 	}
   4337 
   4338 	RValue<UInt> UInt::operator=(const UInt &rhs) const
   4339 	{
   4340 		Value *value = rhs.loadValue();
   4341 		storeValue(value);
   4342 
   4343 		return RValue<UInt>(value);
   4344 	}
   4345 
   4346 	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const
   4347 	{
   4348 		Value *value = rhs.loadValue();
   4349 		storeValue(value);
   4350 
   4351 		return RValue<UInt>(value);
   4352 	}
   4353 
   4354 	RValue<UInt> UInt::operator=(const Int &rhs) const
   4355 	{
   4356 		Value *value = rhs.loadValue();
   4357 		storeValue(value);
   4358 
   4359 		return RValue<UInt>(value);
   4360 	}
   4361 
   4362 	RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const
   4363 	{
   4364 		Value *value = rhs.loadValue();
   4365 		storeValue(value);
   4366 
   4367 		return RValue<UInt>(value);
   4368 	}
   4369 
   4370 	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
   4371 	{
   4372 		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
   4373 	}
   4374 
   4375 	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
   4376 	{
   4377 		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
   4378 	}
   4379 
   4380 	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
   4381 	{
   4382 		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
   4383 	}
   4384 
   4385 	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
   4386 	{
   4387 		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
   4388 	}
   4389 
   4390 	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
   4391 	{
   4392 		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
   4393 	}
   4394 
   4395 	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
   4396 	{
   4397 		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
   4398 	}
   4399 
   4400 	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
   4401 	{
   4402 		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
   4403 	}
   4404 
   4405 	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
   4406 	{
   4407 		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
   4408 	}
   4409 
   4410 	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
   4411 	{
   4412 		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
   4413 	}
   4414 
   4415 	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
   4416 	{
   4417 		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
   4418 	}
   4419 
   4420 	RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs)
   4421 	{
   4422 		return lhs = lhs + rhs;
   4423 	}
   4424 
   4425 	RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs)
   4426 	{
   4427 		return lhs = lhs - rhs;
   4428 	}
   4429 
   4430 	RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs)
   4431 	{
   4432 		return lhs = lhs * rhs;
   4433 	}
   4434 
   4435 	RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs)
   4436 	{
   4437 		return lhs = lhs / rhs;
   4438 	}
   4439 
   4440 	RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs)
   4441 	{
   4442 		return lhs = lhs % rhs;
   4443 	}
   4444 
   4445 	RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs)
   4446 	{
   4447 		return lhs = lhs & rhs;
   4448 	}
   4449 
   4450 	RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs)
   4451 	{
   4452 		return lhs = lhs | rhs;
   4453 	}
   4454 
   4455 	RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs)
   4456 	{
   4457 		return lhs = lhs ^ rhs;
   4458 	}
   4459 
   4460 	RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs)
   4461 	{
   4462 		return lhs = lhs << rhs;
   4463 	}
   4464 
   4465 	RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs)
   4466 	{
   4467 		return lhs = lhs >> rhs;
   4468 	}
   4469 
   4470 	RValue<UInt> operator+(RValue<UInt> val)
   4471 	{
   4472 		return val;
   4473 	}
   4474 
   4475 	RValue<UInt> operator-(RValue<UInt> val)
   4476 	{
   4477 		return RValue<UInt>(Nucleus::createNeg(val.value));
   4478 	}
   4479 
   4480 	RValue<UInt> operator~(RValue<UInt> val)
   4481 	{
   4482 		return RValue<UInt>(Nucleus::createNot(val.value));
   4483 	}
   4484 
   4485 	RValue<UInt> operator++(const UInt &val, int)   // Post-increment
   4486 	{
   4487 		RValue<UInt> res = val;
   4488 
   4489 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
   4490 		val.storeValue(inc);
   4491 
   4492 		return res;
   4493 	}
   4494 
   4495 	const UInt &operator++(const UInt &val)   // Pre-increment
   4496 	{
   4497 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
   4498 		val.storeValue(inc);
   4499 
   4500 		return val;
   4501 	}
   4502 
   4503 	RValue<UInt> operator--(const UInt &val, int)   // Post-decrement
   4504 	{
   4505 		RValue<UInt> res = val;
   4506 
   4507 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
   4508 		val.storeValue(inc);
   4509 
   4510 		return res;
   4511 	}
   4512 
   4513 	const UInt &operator--(const UInt &val)   // Pre-decrement
   4514 	{
   4515 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
   4516 		val.storeValue(inc);
   4517 
   4518 		return val;
   4519 	}
   4520 
   4521 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
   4522 	{
   4523 		return IfThenElse(x > y, x, y);
   4524 	}
   4525 
   4526 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
   4527 	{
   4528 		return IfThenElse(x < y, x, y);
   4529 	}
   4530 
   4531 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
   4532 	{
   4533 		return Min(Max(x, min), max);
   4534 	}
   4535 
   4536 	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
   4537 	{
   4538 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
   4539 	}
   4540 
   4541 	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
   4542 	{
   4543 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
   4544 	}
   4545 
   4546 	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
   4547 	{
   4548 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
   4549 	}
   4550 
   4551 	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
   4552 	{
   4553 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
   4554 	}
   4555 
   4556 	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
   4557 	{
   4558 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
   4559 	}
   4560 
   4561 	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
   4562 	{
   4563 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
   4564 	}
   4565 
   4566 //	RValue<UInt> RoundUInt(RValue<Float> cast)
   4567 //	{
   4568 //		return x86::cvtss2si(val);   // FIXME: Unsigned
   4569 //
   4570 //	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
   4571 //	}
   4572 
   4573 	Type *UInt::getType()
   4574 	{
   4575 		return Type::getInt32Ty(*Nucleus::getContext());
   4576 	}
   4577 
   4578 //	Int2::Int2(RValue<Int> cast)
   4579 //	{
   4580 //		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
   4581 //		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
   4582 //
   4583 //		Constant *shuffle[2];
   4584 //		shuffle[0] = Nucleus::createConstantInt(0);
   4585 //		shuffle[1] = Nucleus::createConstantInt(0);
   4586 //
   4587 //		Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
   4588 //
   4589 //		storeValue(replicate);
   4590 //	}
   4591 
   4592 	Int2::Int2(RValue<Int4> cast)
   4593 	{
   4594 		Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType());
   4595 		Value *element = Nucleus::createExtractElement(long2, 0);
   4596 		Value *int2 = Nucleus::createBitCast(element, Int2::getType());
   4597 
   4598 		storeValue(int2);
   4599 	}
   4600 
   4601 	Int2::Int2()
   4602 	{
   4603 	//	xy.parent = this;
   4604 	}
   4605 
   4606 	Int2::Int2(int x, int y)
   4607 	{
   4608 	//	xy.parent = this;
   4609 
   4610 		Constant *constantVector[2];
   4611 		constantVector[0] = Nucleus::createConstantInt(x);
   4612 		constantVector[1] = Nucleus::createConstantInt(y);
   4613 		Value *vector = Nucleus::createConstantVector(constantVector, 2);
   4614 
   4615 		storeValue(Nucleus::createBitCast(vector, getType()));
   4616 	}
   4617 
   4618 	Int2::Int2(RValue<Int2> rhs)
   4619 	{
   4620 	//	xy.parent = this;
   4621 
   4622 		storeValue(rhs.value);
   4623 	}
   4624 
   4625 	Int2::Int2(const Int2 &rhs)
   4626 	{
   4627 	//	xy.parent = this;
   4628 
   4629 		Value *value = rhs.loadValue();
   4630 		storeValue(value);
   4631 	}
   4632 
   4633 	Int2::Int2(const Reference<Int2> &rhs)
   4634 	{
   4635 	//	xy.parent = this;
   4636 
   4637 		Value *value = rhs.loadValue();
   4638 		storeValue(value);
   4639 	}
   4640 
   4641 	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
   4642 	{
   4643 		if(CPUID::supportsMMX2())
   4644 		{
   4645 			// movd mm0, lo
   4646 			// movd mm1, hi
   4647 			// punpckldq mm0, mm1
   4648 			storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
   4649 		}
   4650 		else
   4651 		{
   4652 			Constant *shuffle[2];
   4653 			shuffle[0] = Nucleus::createConstantInt(0);
   4654 			shuffle[1] = Nucleus::createConstantInt(1);
   4655 
   4656 			Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
   4657 
   4658 			storeValue(Nucleus::createBitCast(packed, Int2::getType()));
   4659 		}
   4660 	}
   4661 
   4662 	RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
   4663 	{
   4664 		storeValue(rhs.value);
   4665 
   4666 		return rhs;
   4667 	}
   4668 
   4669 	RValue<Int2> Int2::operator=(const Int2 &rhs) const
   4670 	{
   4671 		Value *value = rhs.loadValue();
   4672 		storeValue(value);
   4673 
   4674 		return RValue<Int2>(value);
   4675 	}
   4676 
   4677 	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const
   4678 	{
   4679 		Value *value = rhs.loadValue();
   4680 		storeValue(value);
   4681 
   4682 		return RValue<Int2>(value);
   4683 	}
   4684 
   4685 	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
   4686 	{
   4687 		if(CPUID::supportsMMX2())
   4688 		{
   4689 			return x86::paddd(lhs, rhs);
   4690 		}
   4691 		else
   4692 		{
   4693 			return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
   4694 		}
   4695 	}
   4696 
   4697 	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
   4698 	{
   4699 		if(CPUID::supportsMMX2())
   4700 		{
   4701 			return x86::psubd(lhs, rhs);
   4702 		}
   4703 		else
   4704 		{
   4705 			return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
   4706 		}
   4707 	}
   4708 
   4709 //	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
   4710 //	{
   4711 //		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
   4712 //	}
   4713 
   4714 //	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
   4715 //	{
   4716 //		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
   4717 //	}
   4718 
   4719 //	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
   4720 //	{
   4721 //		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
   4722 //	}
   4723 
   4724 	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
   4725 	{
   4726 		if(CPUID::supportsMMX2())
   4727 		{
   4728 			return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
   4729 		}
   4730 		else
   4731 		{
   4732 			return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
   4733 		}
   4734 	}
   4735 
   4736 	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
   4737 	{
   4738 		if(CPUID::supportsMMX2())
   4739 		{
   4740 			return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
   4741 		}
   4742 		else
   4743 		{
   4744 			return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
   4745 		}
   4746 	}
   4747 
   4748 	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
   4749 	{
   4750 		if(CPUID::supportsMMX2())
   4751 		{
   4752 			return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
   4753 		}
   4754 		else
   4755 		{
   4756 			return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
   4757 		}
   4758 	}
   4759 
   4760 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
   4761 	{
   4762 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
   4763 
   4764 		return x86::pslld(lhs, rhs);
   4765 	}
   4766 
   4767 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
   4768 	{
   4769 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
   4770 
   4771 		return x86::psrad(lhs, rhs);
   4772 	}
   4773 
   4774 	RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs)
   4775 	{
   4776 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
   4777 
   4778 		return x86::pslld(lhs, rhs);
   4779 	}
   4780 
   4781 	RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs)
   4782 	{
   4783 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
   4784 
   4785 		return x86::psrad(lhs, rhs);
   4786 	}
   4787 
   4788 	RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs)
   4789 	{
   4790 		return lhs = lhs + rhs;
   4791 	}
   4792 
   4793 	RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs)
   4794 	{
   4795 		return lhs = lhs - rhs;
   4796 	}
   4797 
   4798 //	RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs)
   4799 //	{
   4800 //		return lhs = lhs * rhs;
   4801 //	}
   4802 
   4803 //	RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs)
   4804 //	{
   4805 //		return lhs = lhs / rhs;
   4806 //	}
   4807 
   4808 //	RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs)
   4809 //	{
   4810 //		return lhs = lhs % rhs;
   4811 //	}
   4812 
   4813 	RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs)
   4814 	{
   4815 		return lhs = lhs & rhs;
   4816 	}
   4817 
   4818 	RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs)
   4819 	{
   4820 		return lhs = lhs | rhs;
   4821 	}
   4822 
   4823 	RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs)
   4824 	{
   4825 		return lhs = lhs ^ rhs;
   4826 	}
   4827 
   4828 	RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
   4829 	{
   4830 		return lhs = lhs << rhs;
   4831 	}
   4832 
   4833 	RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
   4834 	{
   4835 		return lhs = lhs >> rhs;
   4836 	}
   4837 
   4838 	RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs)
   4839 	{
   4840 		return lhs = lhs << rhs;
   4841 	}
   4842 
   4843 	RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs)
   4844 	{
   4845 		return lhs = lhs >> rhs;
   4846 	}
   4847 
   4848 //	RValue<Int2> operator+(RValue<Int2> val)
   4849 //	{
   4850 //		return val;
   4851 //	}
   4852 
   4853 //	RValue<Int2> operator-(RValue<Int2> val)
   4854 //	{
   4855 //		return RValue<Int2>(Nucleus::createNeg(val.value));
   4856 //	}
   4857 
   4858 	RValue<Int2> operator~(RValue<Int2> val)
   4859 	{
   4860 		if(CPUID::supportsMMX2())
   4861 		{
   4862 			return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
   4863 		}
   4864 		else
   4865 		{
   4866 			return RValue<Int2>(Nucleus::createNot(val.value));
   4867 		}
   4868 	}
   4869 
   4870 	RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y)
   4871 	{
   4872 		if(CPUID::supportsMMX2())
   4873 		{
   4874 			return x86::punpckldq(x, y);
   4875 		}
   4876 		else
   4877 		{
   4878 			Constant *shuffle[2];
   4879 			shuffle[0] = Nucleus::createConstantInt(0);
   4880 			shuffle[1] = Nucleus::createConstantInt(2);
   4881 
   4882 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
   4883 
   4884 			return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
   4885 		}
   4886 	}
   4887 
   4888 	RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
   4889 	{
   4890 		if(CPUID::supportsMMX2())
   4891 		{
   4892 			return x86::punpckhdq(x, y);
   4893 		}
   4894 		else
   4895 		{
   4896 			Constant *shuffle[2];
   4897 			shuffle[0] = Nucleus::createConstantInt(1);
   4898 			shuffle[1] = Nucleus::createConstantInt(3);
   4899 
   4900 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
   4901 
   4902 			return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
   4903 		}
   4904 	}
   4905 
   4906 	RValue<Int> Extract(RValue<Int2> val, int i)
   4907 	{
   4908 		if(false)   // FIXME: LLVM does not generate optimal code
   4909 		{
   4910 			return RValue<Int>(Nucleus::createExtractElement(val.value, i));
   4911 		}
   4912 		else
   4913 		{
   4914 			if(i == 0)
   4915 			{
   4916 				return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), 0));
   4917 			}
   4918 			else
   4919 			{
   4920 				Int2 val2 = As<Int2>(UnpackHigh(val, val));
   4921 
   4922 				return Extract(val2, 0);
   4923 			}
   4924 		}
   4925 	}
   4926 
   4927 	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
   4928 	{
   4929 		return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), element.value, i), Int2::getType()));
   4930 	}
   4931 
   4932 	Type *Int2::getType()
   4933 	{
   4934 		if(CPUID::supportsMMX2())
   4935 		{
   4936 			return MMX::getType();
   4937 		}
   4938 		else
   4939 		{
   4940 			return VectorType::get(Int::getType(), 2);
   4941 		}
   4942 	}
   4943 
   4944 	UInt2::UInt2()
   4945 	{
   4946 	//	xy.parent = this;
   4947 	}
   4948 
   4949 	UInt2::UInt2(unsigned int x, unsigned int y)
   4950 	{
   4951 	//	xy.parent = this;
   4952 
   4953 		Constant *constantVector[2];
   4954 		constantVector[0] = Nucleus::createConstantInt(x);
   4955 		constantVector[1] = Nucleus::createConstantInt(y);
   4956 		Value *vector = Nucleus::createConstantVector(constantVector, 2);
   4957 
   4958 		storeValue(Nucleus::createBitCast(vector, getType()));
   4959 	}
   4960 
   4961 	UInt2::UInt2(RValue<UInt2> rhs)
   4962 	{
   4963 	//	xy.parent = this;
   4964 
   4965 		storeValue(rhs.value);
   4966 	}
   4967 
   4968 	UInt2::UInt2(const UInt2 &rhs)
   4969 	{
   4970 	//	xy.parent = this;
   4971 
   4972 		Value *value = rhs.loadValue();
   4973 		storeValue(value);
   4974 	}
   4975 
   4976 	UInt2::UInt2(const Reference<UInt2> &rhs)
   4977 	{
   4978 	//	xy.parent = this;
   4979 
   4980 		Value *value = rhs.loadValue();
   4981 		storeValue(value);
   4982 	}
   4983 
   4984 	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const
   4985 	{
   4986 		storeValue(rhs.value);
   4987 
   4988 		return rhs;
   4989 	}
   4990 
   4991 	RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
   4992 	{
   4993 		Value *value = rhs.loadValue();
   4994 		storeValue(value);
   4995 
   4996 		return RValue<UInt2>(value);
   4997 	}
   4998 
   4999 	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const
   5000 	{
   5001 		Value *value = rhs.loadValue();
   5002 		storeValue(value);
   5003 
   5004 		return RValue<UInt2>(value);
   5005 	}
   5006 
   5007 	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5008 	{
   5009 		if(CPUID::supportsMMX2())
   5010 		{
   5011 			return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
   5012 		}
   5013 		else
   5014 		{
   5015 			return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
   5016 		}
   5017 	}
   5018 
   5019 	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5020 	{
   5021 		if(CPUID::supportsMMX2())
   5022 		{
   5023 			return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
   5024 		}
   5025 		else
   5026 		{
   5027 			return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
   5028 		}
   5029 	}
   5030 
   5031 //	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5032 //	{
   5033 //		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
   5034 //	}
   5035 
   5036 //	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5037 //	{
   5038 //		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
   5039 //	}
   5040 
   5041 //	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5042 //	{
   5043 //		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
   5044 //	}
   5045 
   5046 	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5047 	{
   5048 		if(CPUID::supportsMMX2())
   5049 		{
   5050 			return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
   5051 		}
   5052 		else
   5053 		{
   5054 			return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
   5055 		}
   5056 	}
   5057 
   5058 	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5059 	{
   5060 		if(CPUID::supportsMMX2())
   5061 		{
   5062 			return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
   5063 		}
   5064 		else
   5065 		{
   5066 			return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
   5067 		}
   5068 	}
   5069 
   5070 	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
   5071 	{
   5072 		if(CPUID::supportsMMX2())
   5073 		{
   5074 			return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
   5075 		}
   5076 		else
   5077 		{
   5078 			return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
   5079 		}
   5080 	}
   5081 
   5082 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
   5083 	{
   5084 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
   5085 
   5086 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
   5087 	}
   5088 
   5089 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
   5090 	{
   5091 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
   5092 
   5093 		return x86::psrld(lhs, rhs);
   5094 	}
   5095 
   5096 	RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs)
   5097 	{
   5098 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
   5099 
   5100 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
   5101 	}
   5102 
   5103 	RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs)
   5104 	{
   5105 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
   5106 
   5107 		return x86::psrld(lhs, rhs);
   5108 	}
   5109 
   5110 	RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs)
   5111 	{
   5112 		return lhs = lhs + rhs;
   5113 	}
   5114 
   5115 	RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs)
   5116 	{
   5117 		return lhs = lhs - rhs;
   5118 	}
   5119 
   5120 //	RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs)
   5121 //	{
   5122 //		return lhs = lhs * rhs;
   5123 //	}
   5124 
   5125 //	RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs)
   5126 //	{
   5127 //		return lhs = lhs / rhs;
   5128 //	}
   5129 
   5130 //	RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs)
   5131 //	{
   5132 //		return lhs = lhs % rhs;
   5133 //	}
   5134 
   5135 	RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs)
   5136 	{
   5137 		return lhs = lhs & rhs;
   5138 	}
   5139 
   5140 	RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs)
   5141 	{
   5142 		return lhs = lhs | rhs;
   5143 	}
   5144 
   5145 	RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs)
   5146 	{
   5147 		return lhs = lhs ^ rhs;
   5148 	}
   5149 
   5150 	RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
   5151 	{
   5152 		return lhs = lhs << rhs;
   5153 	}
   5154 
   5155 	RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
   5156 	{
   5157 		return lhs = lhs >> rhs;
   5158 	}
   5159 
   5160 	RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs)
   5161 	{
   5162 		return lhs = lhs << rhs;
   5163 	}
   5164 
   5165 	RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs)
   5166 	{
   5167 		return lhs = lhs >> rhs;
   5168 	}
   5169 
   5170 //	RValue<UInt2> operator+(RValue<UInt2> val)
   5171 //	{
   5172 //		return val;
   5173 //	}
   5174 
   5175 //	RValue<UInt2> operator-(RValue<UInt2> val)
   5176 //	{
   5177 //		return RValue<UInt2>(Nucleus::createNeg(val.value));
   5178 //	}
   5179 
   5180 	RValue<UInt2> operator~(RValue<UInt2> val)
   5181 	{
   5182 		if(CPUID::supportsMMX2())
   5183 		{
   5184 			return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
   5185 		}
   5186 		else
   5187 		{
   5188 			return RValue<UInt2>(Nucleus::createNot(val.value));
   5189 		}
   5190 	}
   5191 
   5192 	Type *UInt2::getType()
   5193 	{
   5194 		if(CPUID::supportsMMX2())
   5195 		{
   5196 			return MMX::getType();
   5197 		}
   5198 		else
   5199 		{
   5200 			return VectorType::get(UInt::getType(), 2);
   5201 		}
   5202 	}
   5203 
   5204 	Int4::Int4(RValue<Float4> cast)
   5205 	{
   5206 	//	xyzw.parent = this;
   5207 
   5208 		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
   5209 
   5210 		storeValue(xyzw);
   5211 	}
   5212 
   5213 	Int4::Int4(RValue<Short4> cast)
   5214 	{
   5215 		Value *long2 = UndefValue::get(Long2::getType());
   5216 		Value *element = Nucleus::createBitCast(cast.value, Long::getType());
   5217 		long2 = Nucleus::createInsertElement(long2, element, 0);
   5218 		RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
   5219 
   5220 		if(CPUID::supportsSSE4_1())
   5221 		{
   5222 			storeValue(x86::pmovsxwd(vector).value);
   5223 		}
   5224 		else
   5225 		{
   5226 			Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
   5227 
   5228 			Constant *swizzle[8];
   5229 			swizzle[0] = Nucleus::createConstantInt(0);
   5230 			swizzle[1] = Nucleus::createConstantInt(0);
   5231 			swizzle[2] = Nucleus::createConstantInt(1);
   5232 			swizzle[3] = Nucleus::createConstantInt(1);
   5233 			swizzle[4] = Nucleus::createConstantInt(2);
   5234 			swizzle[5] = Nucleus::createConstantInt(2);
   5235 			swizzle[6] = Nucleus::createConstantInt(3);
   5236 			swizzle[7] = Nucleus::createConstantInt(3);
   5237 
   5238 			Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 8));
   5239 			Value *d = Nucleus::createBitCast(c, Int4::getType());
   5240 			storeValue(d);
   5241 
   5242 			// Each Short is packed into each Int in the (Short | Short) format.
   5243 			// Shifting by 16 will retrieve the original Short value.
   5244 			// Shitfing an Int will propagate the sign bit, which will work
   5245 			// for both positive and negative values of a Short.
   5246 			*this >>= 16;
   5247 		}
   5248 	}
   5249 
   5250 	Int4::Int4(RValue<UShort4> cast)
   5251 	{
   5252 		Value *long2 = UndefValue::get(Long2::getType());
   5253 		Value *element = Nucleus::createBitCast(cast.value, Long::getType());
   5254 		long2 = Nucleus::createInsertElement(long2, element, 0);
   5255 		RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
   5256 
   5257 		if(CPUID::supportsSSE4_1())
   5258 		{
   5259 			storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
   5260 		}
   5261 		else
   5262 		{
   5263 			Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
   5264 
   5265 			Constant *swizzle[8];
   5266 			swizzle[0] = Nucleus::createConstantInt(0);
   5267 			swizzle[1] = Nucleus::createConstantInt(8);
   5268 			swizzle[2] = Nucleus::createConstantInt(1);
   5269 			swizzle[3] = Nucleus::createConstantInt(9);
   5270 			swizzle[4] = Nucleus::createConstantInt(2);
   5271 			swizzle[5] = Nucleus::createConstantInt(10);
   5272 			swizzle[6] = Nucleus::createConstantInt(3);
   5273 			swizzle[7] = Nucleus::createConstantInt(11);
   5274 
   5275 			Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle, 8));
   5276 			Value *d = Nucleus::createBitCast(c, Int4::getType());
   5277 			storeValue(d);
   5278 		}
   5279 	}
   5280 
   5281 	Int4::Int4()
   5282 	{
   5283 	//	xyzw.parent = this;
   5284 	}
   5285 
   5286 	Int4::Int4(int xyzw)
   5287 	{
   5288 		constant(xyzw, xyzw, xyzw, xyzw);
   5289 	}
   5290 
   5291 	Int4::Int4(int x, int yzw)
   5292 	{
   5293 		constant(x, yzw, yzw, yzw);
   5294 	}
   5295 
   5296 	Int4::Int4(int x, int y, int zw)
   5297 	{
   5298 		constant(x, y, zw, zw);
   5299 	}
   5300 
   5301 	Int4::Int4(int x, int y, int z, int w)
   5302 	{
   5303 		constant(x, y, z, w);
   5304 	}
   5305 
   5306 	void Int4::constant(int x, int y, int z, int w)
   5307 	{
   5308 	//	xyzw.parent = this;
   5309 
   5310 		Constant *constantVector[4];
   5311 		constantVector[0] = Nucleus::createConstantInt(x);
   5312 		constantVector[1] = Nucleus::createConstantInt(y);
   5313 		constantVector[2] = Nucleus::createConstantInt(z);
   5314 		constantVector[3] = Nucleus::createConstantInt(w);
   5315 
   5316 		storeValue(Nucleus::createConstantVector(constantVector, 4));
   5317 	}
   5318 
   5319 	Int4::Int4(RValue<Int4> rhs)
   5320 	{
   5321 	//	xyzw.parent = this;
   5322 
   5323 		storeValue(rhs.value);
   5324 	}
   5325 
   5326 	Int4::Int4(const Int4 &rhs)
   5327 	{
   5328 	//	xyzw.parent = this;
   5329 
   5330 		Value *value = rhs.loadValue();
   5331 		storeValue(value);
   5332 	}
   5333 
   5334 	Int4::Int4(const Reference<Int4> &rhs)
   5335 	{
   5336 	//	xyzw.parent = this;
   5337 
   5338 		Value *value = rhs.loadValue();
   5339 		storeValue(value);
   5340 	}
   5341 
   5342 	Int4::Int4(RValue<UInt4> rhs)
   5343 	{
   5344 	//	xyzw.parent = this;
   5345 
   5346 		storeValue(rhs.value);
   5347 	}
   5348 
   5349 	Int4::Int4(const UInt4 &rhs)
   5350 	{
   5351 	//	xyzw.parent = this;
   5352 
   5353 		Value *value = rhs.loadValue();
   5354 		storeValue(value);
   5355 	}
   5356 
   5357 	Int4::Int4(const Reference<UInt4> &rhs)
   5358 	{
   5359 	//	xyzw.parent = this;
   5360 
   5361 		Value *value = rhs.loadValue();
   5362 		storeValue(value);
   5363 	}
   5364 
   5365 	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
   5366 	{
   5367 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
   5368 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
   5369 
   5370 		Value *long2 = UndefValue::get(Long2::getType());
   5371 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
   5372 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
   5373 		Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
   5374 
   5375 		storeValue(int4);
   5376 	}
   5377 
   5378 	RValue<Int4> Int4::operator=(RValue<Int4> rhs) const
   5379 	{
   5380 		storeValue(rhs.value);
   5381 
   5382 		return rhs;
   5383 	}
   5384 
   5385 	RValue<Int4> Int4::operator=(const Int4 &rhs) const
   5386 	{
   5387 		Value *value = rhs.loadValue();
   5388 		storeValue(value);
   5389 
   5390 		return RValue<Int4>(value);
   5391 	}
   5392 
   5393 	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const
   5394 	{
   5395 		Value *value = rhs.loadValue();
   5396 		storeValue(value);
   5397 
   5398 		return RValue<Int4>(value);
   5399 	}
   5400 
   5401 	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
   5402 	{
   5403 		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
   5404 	}
   5405 
   5406 	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
   5407 	{
   5408 		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
   5409 	}
   5410 
   5411 	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
   5412 	{
   5413 		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
   5414 	}
   5415 
   5416 	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
   5417 	{
   5418 		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
   5419 	}
   5420 
   5421 	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
   5422 	{
   5423 		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
   5424 	}
   5425 
   5426 	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
   5427 	{
   5428 		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
   5429 	}
   5430 
   5431 	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
   5432 	{
   5433 		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
   5434 	}
   5435 
   5436 	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
   5437 	{
   5438 		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
   5439 	}
   5440 
   5441 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
   5442 	{
   5443 		return x86::pslld(lhs, rhs);
   5444 	}
   5445 
   5446 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
   5447 	{
   5448 		return x86::psrad(lhs, rhs);
   5449 	}
   5450 
   5451 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
   5452 	{
   5453 		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
   5454 	}
   5455 
   5456 	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
   5457 	{
   5458 		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
   5459 	}
   5460 
   5461 	RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs)
   5462 	{
   5463 		return lhs = lhs + rhs;
   5464 	}
   5465 
   5466 	RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs)
   5467 	{
   5468 		return lhs = lhs - rhs;
   5469 	}
   5470 
   5471 	RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs)
   5472 	{
   5473 		return lhs = lhs * rhs;
   5474 	}
   5475 
   5476 //	RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs)
   5477 //	{
   5478 //		return lhs = lhs / rhs;
   5479 //	}
   5480 
   5481 //	RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs)
   5482 //	{
   5483 //		return lhs = lhs % rhs;
   5484 //	}
   5485 
   5486 	RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs)
   5487 	{
   5488 		return lhs = lhs & rhs;
   5489 	}
   5490 
   5491 	RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs)
   5492 	{
   5493 		return lhs = lhs | rhs;
   5494 	}
   5495 
   5496 	RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs)
   5497 	{
   5498 		return lhs = lhs ^ rhs;
   5499 	}
   5500 
   5501 	RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
   5502 	{
   5503 		return lhs = lhs << rhs;
   5504 	}
   5505 
   5506 	RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
   5507 	{
   5508 		return lhs = lhs >> rhs;
   5509 	}
   5510 
   5511 	RValue<Int4> operator+(RValue<Int4> val)
   5512 	{
   5513 		return val;
   5514 	}
   5515 
   5516 	RValue<Int4> operator-(RValue<Int4> val)
   5517 	{
   5518 		return RValue<Int4>(Nucleus::createNeg(val.value));
   5519 	}
   5520 
   5521 	RValue<Int4> operator~(RValue<Int4> val)
   5522 	{
   5523 		return RValue<Int4>(Nucleus::createNot(val.value));
   5524 	}
   5525 
   5526 	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
   5527 	{
   5528 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5529 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5530 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   5531 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5532 	}
   5533 
   5534 	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
   5535 	{
   5536 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
   5537 	}
   5538 
   5539 	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
   5540 	{
   5541 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5542 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5543 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
   5544 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5545 	}
   5546 
   5547 	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
   5548 	{
   5549 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   5550 	}
   5551 
   5552 	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
   5553 	{
   5554 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5555 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5556 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
   5557 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
   5558 	}
   5559 
   5560 	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
   5561 	{
   5562 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
   5563 	}
   5564 
   5565 	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
   5566 	{
   5567 		if(CPUID::supportsSSE4_1())
   5568 		{
   5569 			return x86::pmaxsd(x, y);
   5570 		}
   5571 		else
   5572 		{
   5573 			RValue<Int4> greater = CmpNLE(x, y);
   5574 			return x & greater | y & ~greater;
   5575 		}
   5576 	}
   5577 
   5578 	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
   5579 	{
   5580 		if(CPUID::supportsSSE4_1())
   5581 		{
   5582 			return x86::pminsd(x, y);
   5583 		}
   5584 		else
   5585 		{
   5586 			RValue<Int4> less = CmpLT(x, y);
   5587 			return x & less | y & ~less;
   5588 		}
   5589 	}
   5590 
   5591 	RValue<Int4> RoundInt(RValue<Float4> cast)
   5592 	{
   5593 		return x86::cvtps2dq(cast);
   5594 	}
   5595 
   5596 	RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
   5597 	{
   5598 		return x86::packssdw(x, y);
   5599 	}
   5600 
   5601 	RValue<Int> Extract(RValue<Int4> x, int i)
   5602 	{
   5603 		return RValue<Int>(Nucleus::createExtractElement(x.value, i));
   5604 	}
   5605 
   5606 	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
   5607 	{
   5608 		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
   5609 	}
   5610 
   5611 	RValue<Int> SignMask(RValue<Int4> x)
   5612 	{
   5613 		return x86::movmskps(As<Float4>(x));
   5614 	}
   5615 
   5616 	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
   5617 	{
   5618 		return RValue<Int4>(Nucleus::createSwizzle(x.value, select));
   5619 	}
   5620 
   5621 	Type *Int4::getType()
   5622 	{
   5623 		return VectorType::get(Int::getType(), 4);
   5624 	}
   5625 
   5626 	UInt4::UInt4(RValue<Float4> cast)
   5627 	{
   5628 	//	xyzw.parent = this;
   5629 
   5630 		Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
   5631 
   5632 		storeValue(xyzw);
   5633 	}
   5634 
   5635 	UInt4::UInt4()
   5636 	{
   5637 	//	xyzw.parent = this;
   5638 	}
   5639 
   5640 	UInt4::UInt4(int xyzw)
   5641 	{
   5642 		constant(xyzw, xyzw, xyzw, xyzw);
   5643 	}
   5644 
   5645 	UInt4::UInt4(int x, int yzw)
   5646 	{
   5647 		constant(x, yzw, yzw, yzw);
   5648 	}
   5649 
   5650 	UInt4::UInt4(int x, int y, int zw)
   5651 	{
   5652 		constant(x, y, zw, zw);
   5653 	}
   5654 
   5655 	UInt4::UInt4(int x, int y, int z, int w)
   5656 	{
   5657 		constant(x, y, z, w);
   5658 	}
   5659 
   5660 	void UInt4::constant(int x, int y, int z, int w)
   5661 	{
   5662 	//	xyzw.parent = this;
   5663 
   5664 		Constant *constantVector[4];
   5665 		constantVector[0] = Nucleus::createConstantInt(x);
   5666 		constantVector[1] = Nucleus::createConstantInt(y);
   5667 		constantVector[2] = Nucleus::createConstantInt(z);
   5668 		constantVector[3] = Nucleus::createConstantInt(w);
   5669 
   5670 		storeValue(Nucleus::createConstantVector(constantVector, 4));
   5671 	}
   5672 
   5673 	UInt4::UInt4(RValue<UInt4> rhs)
   5674 	{
   5675 	//	xyzw.parent = this;
   5676 
   5677 		storeValue(rhs.value);
   5678 	}
   5679 
   5680 	UInt4::UInt4(const UInt4 &rhs)
   5681 	{
   5682 	//	xyzw.parent = this;
   5683 
   5684 		Value *value = rhs.loadValue();
   5685 		storeValue(value);
   5686 	}
   5687 
   5688 	UInt4::UInt4(const Reference<UInt4> &rhs)
   5689 	{
   5690 	//	xyzw.parent = this;
   5691 
   5692 		Value *value = rhs.loadValue();
   5693 		storeValue(value);
   5694 	}
   5695 
   5696 	UInt4::UInt4(RValue<Int4> rhs)
   5697 	{
   5698 	//	xyzw.parent = this;
   5699 
   5700 		storeValue(rhs.value);
   5701 	}
   5702 
   5703 	UInt4::UInt4(const Int4 &rhs)
   5704 	{
   5705 	//	xyzw.parent = this;
   5706 
   5707 		Value *value = rhs.loadValue();
   5708 		storeValue(value);
   5709 	}
   5710 
   5711 	UInt4::UInt4(const Reference<Int4> &rhs)
   5712 	{
   5713 	//	xyzw.parent = this;
   5714 
   5715 		Value *value = rhs.loadValue();
   5716 		storeValue(value);
   5717 	}
   5718 
   5719 	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
   5720 	{
   5721 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
   5722 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
   5723 
   5724 		Value *long2 = UndefValue::get(Long2::getType());
   5725 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
   5726 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
   5727 		Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
   5728 
   5729 		storeValue(uint4);
   5730 	}
   5731 
   5732 	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const
   5733 	{
   5734 		storeValue(rhs.value);
   5735 
   5736 		return rhs;
   5737 	}
   5738 
   5739 	RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
   5740 	{
   5741 		Value *value = rhs.loadValue();
   5742 		storeValue(value);
   5743 
   5744 		return RValue<UInt4>(value);
   5745 	}
   5746 
   5747 	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const
   5748 	{
   5749 		Value *value = rhs.loadValue();
   5750 		storeValue(value);
   5751 
   5752 		return RValue<UInt4>(value);
   5753 	}
   5754 
   5755 	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5756 	{
   5757 		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
   5758 	}
   5759 
   5760 	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5761 	{
   5762 		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
   5763 	}
   5764 
   5765 	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5766 	{
   5767 		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
   5768 	}
   5769 
   5770 	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5771 	{
   5772 		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
   5773 	}
   5774 
   5775 	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5776 	{
   5777 		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
   5778 	}
   5779 
   5780 	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5781 	{
   5782 		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
   5783 	}
   5784 
   5785 	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5786 	{
   5787 		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
   5788 	}
   5789 
   5790 	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5791 	{
   5792 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
   5793 	}
   5794 
   5795 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
   5796 	{
   5797 		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
   5798 	}
   5799 
   5800 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
   5801 	{
   5802 		return x86::psrld(lhs, rhs);
   5803 	}
   5804 
   5805 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5806 	{
   5807 		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
   5808 	}
   5809 
   5810 	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
   5811 	{
   5812 		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
   5813 	}
   5814 
   5815 	RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs)
   5816 	{
   5817 		return lhs = lhs + rhs;
   5818 	}
   5819 
   5820 	RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs)
   5821 	{
   5822 		return lhs = lhs - rhs;
   5823 	}
   5824 
   5825 	RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs)
   5826 	{
   5827 		return lhs = lhs * rhs;
   5828 	}
   5829 
   5830 //	RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs)
   5831 //	{
   5832 //		return lhs = lhs / rhs;
   5833 //	}
   5834 
   5835 //	RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs)
   5836 //	{
   5837 //		return lhs = lhs % rhs;
   5838 //	}
   5839 
   5840 	RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs)
   5841 	{
   5842 		return lhs = lhs & rhs;
   5843 	}
   5844 
   5845 	RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs)
   5846 	{
   5847 		return lhs = lhs | rhs;
   5848 	}
   5849 
   5850 	RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs)
   5851 	{
   5852 		return lhs = lhs ^ rhs;
   5853 	}
   5854 
   5855 	RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
   5856 	{
   5857 		return lhs = lhs << rhs;
   5858 	}
   5859 
   5860 	RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
   5861 	{
   5862 		return lhs = lhs >> rhs;
   5863 	}
   5864 
   5865 	RValue<UInt4> operator+(RValue<UInt4> val)
   5866 	{
   5867 		return val;
   5868 	}
   5869 
   5870 	RValue<UInt4> operator-(RValue<UInt4> val)
   5871 	{
   5872 		return RValue<UInt4>(Nucleus::createNeg(val.value));
   5873 	}
   5874 
   5875 	RValue<UInt4> operator~(RValue<UInt4> val)
   5876 	{
   5877 		return RValue<UInt4>(Nucleus::createNot(val.value));
   5878 	}
   5879 
   5880 	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
   5881 	{
   5882 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5883 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5884 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
   5885 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5886 	}
   5887 
   5888 	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
   5889 	{
   5890 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
   5891 	}
   5892 
   5893 	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
   5894 	{
   5895 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5896 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5897 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
   5898 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5899 	}
   5900 
   5901 	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
   5902 	{
   5903 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
   5904 	}
   5905 
   5906 	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
   5907 	{
   5908 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
   5909 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
   5910 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
   5911 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
   5912 	}
   5913 
   5914 	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
   5915 	{
   5916 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
   5917 	}
   5918 
   5919 	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
   5920 	{
   5921 		if(CPUID::supportsSSE4_1())
   5922 		{
   5923 			return x86::pmaxud(x, y);
   5924 		}
   5925 		else
   5926 		{
   5927 			RValue<UInt4> greater = CmpNLE(x, y);
   5928 			return x & greater | y & ~greater;
   5929 		}
   5930 	}
   5931 
   5932 	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
   5933 	{
   5934 		if(CPUID::supportsSSE4_1())
   5935 		{
   5936 			return x86::pminud(x, y);
   5937 		}
   5938 		else
   5939 		{
   5940 			RValue<UInt4> less = CmpLT(x, y);
   5941 			return x & less | y & ~less;
   5942 		}
   5943 	}
   5944 
   5945 	RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
   5946 	{
   5947 		return x86::packusdw(x, y);   // FIXME: Fallback required
   5948 	}
   5949 
   5950 	Type *UInt4::getType()
   5951 	{
   5952 		return VectorType::get(UInt::getType(), 4);
   5953 	}
   5954 
   5955 	Float::Float(RValue<Int> cast)
   5956 	{
   5957 		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
   5958 
   5959 		storeValue(integer);
   5960 	}
   5961 
   5962 	Float::Float()
   5963 	{
   5964 
   5965 	}
   5966 
   5967 	Float::Float(float x)
   5968 	{
   5969 		storeValue(Nucleus::createConstantFloat(x));
   5970 	}
   5971 
   5972 	Float::Float(RValue<Float> rhs)
   5973 	{
   5974 		storeValue(rhs.value);
   5975 	}
   5976 
   5977 	Float::Float(const Float &rhs)
   5978 	{
   5979 		Value *value = rhs.loadValue();
   5980 		storeValue(value);
   5981 	}
   5982 
   5983 	Float::Float(const Reference<Float> &rhs)
   5984 	{
   5985 		Value *value = rhs.loadValue();
   5986 		storeValue(value);
   5987 	}
   5988 
   5989 	RValue<Float> Float::operator=(RValue<Float> rhs) const
   5990 	{
   5991 		storeValue(rhs.value);
   5992 
   5993 		return rhs;
   5994 	}
   5995 
   5996 	RValue<Float> Float::operator=(const Float &rhs) const
   5997 	{
   5998 		Value *value = rhs.loadValue();
   5999 		storeValue(value);
   6000 
   6001 		return RValue<Float>(value);
   6002 	}
   6003 
   6004 	RValue<Float> Float::operator=(const Reference<Float> &rhs) const
   6005 	{
   6006 		Value *value = rhs.loadValue();
   6007 		storeValue(value);
   6008 
   6009 		return RValue<Float>(value);
   6010 	}
   6011 
   6012 	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
   6013 	{
   6014 		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
   6015 	}
   6016 
   6017 	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
   6018 	{
   6019 		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
   6020 	}
   6021 
   6022 	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
   6023 	{
   6024 		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
   6025 	}
   6026 
   6027 	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
   6028 	{
   6029 		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
   6030 	}
   6031 
   6032 	RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs)
   6033 	{
   6034 		return lhs = lhs + rhs;
   6035 	}
   6036 
   6037 	RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs)
   6038 	{
   6039 		return lhs = lhs - rhs;
   6040 	}
   6041 
   6042 	RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs)
   6043 	{
   6044 		return lhs = lhs * rhs;
   6045 	}
   6046 
   6047 	RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs)
   6048 	{
   6049 		return lhs = lhs / rhs;
   6050 	}
   6051 
   6052 	RValue<Float> operator+(RValue<Float> val)
   6053 	{
   6054 		return val;
   6055 	}
   6056 
   6057 	RValue<Float> operator-(RValue<Float> val)
   6058 	{
   6059 		return RValue<Float>(Nucleus::createFNeg(val.value));
   6060 	}
   6061 
   6062 	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
   6063 	{
   6064 		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
   6065 	}
   6066 
   6067 	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
   6068 	{
   6069 		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
   6070 	}
   6071 
   6072 	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
   6073 	{
   6074 		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
   6075 	}
   6076 
   6077 	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
   6078 	{
   6079 		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
   6080 	}
   6081 
   6082 	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
   6083 	{
   6084 		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
   6085 	}
   6086 
   6087 	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
   6088 	{
   6089 		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
   6090 	}
   6091 
   6092 	RValue<Float> Abs(RValue<Float> x)
   6093 	{
   6094 		return IfThenElse(x > 0.0f, x, -x);
   6095 	}
   6096 
   6097 	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
   6098 	{
   6099 		return IfThenElse(x > y, x, y);
   6100 	}
   6101 
   6102 	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
   6103 	{
   6104 		return IfThenElse(x < y, x, y);
   6105 	}
   6106 
   6107 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
   6108 	{
   6109 		if(exactAtPow2)
   6110 		{
   6111 			// rcpss uses a piecewise-linear approximation which minimizes the relative error
   6112 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   6113 			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   6114 		}
   6115 		else
   6116 		{
   6117 			return x86::rcpss(x);
   6118 		}
   6119 	}
   6120 
   6121 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
   6122 	{
   6123 		return x86::rsqrtss(x);
   6124 	}
   6125 
   6126 	RValue<Float> Sqrt(RValue<Float> x)
   6127 	{
   6128 		return x86::sqrtss(x);
   6129 	}
   6130 
   6131 	RValue<Float> Round(RValue<Float> x)
   6132 	{
   6133 		if(CPUID::supportsSSE4_1())
   6134 		{
   6135 			return x86::roundss(x, 0);
   6136 		}
   6137 		else
   6138 		{
   6139 			return Float4(Round(Float4(x))).x;
   6140 		}
   6141 	}
   6142 
   6143 	RValue<Float> Trunc(RValue<Float> x)
   6144 	{
   6145 		if(CPUID::supportsSSE4_1())
   6146 		{
   6147 			return x86::roundss(x, 3);
   6148 		}
   6149 		else
   6150 		{
   6151 			return Float(Int(x));   // Rounded toward zero
   6152 		}
   6153 	}
   6154 
   6155 	RValue<Float> Frac(RValue<Float> x)
   6156 	{
   6157 		if(CPUID::supportsSSE4_1())
   6158 		{
   6159 			return x - x86::floorss(x);
   6160 		}
   6161 		else
   6162 		{
   6163 			return Float4(Frac(Float4(x))).x;
   6164 		}
   6165 	}
   6166 
   6167 	RValue<Float> Floor(RValue<Float> x)
   6168 	{
   6169 		if(CPUID::supportsSSE4_1())
   6170 		{
   6171 			return x86::floorss(x);
   6172 		}
   6173 		else
   6174 		{
   6175 			return Float4(Floor(Float4(x))).x;
   6176 		}
   6177 	}
   6178 
   6179 	RValue<Float> Ceil(RValue<Float> x)
   6180 	{
   6181 		if(CPUID::supportsSSE4_1())
   6182 		{
   6183 			return x86::ceilss(x);
   6184 		}
   6185 		else
   6186 		{
   6187 			return Float4(Ceil(Float4(x))).x;
   6188 		}
   6189 	}
   6190 
   6191 	Type *Float::getType()
   6192 	{
   6193 		return Type::getFloatTy(*Nucleus::getContext());
   6194 	}
   6195 
   6196 	Float2::Float2(RValue<Float4> cast)
   6197 	{
   6198 	//	xyzw.parent = this;
   6199 
   6200 		Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType());
   6201 		Value *int64 = Nucleus::createExtractElement(int64x2, 0);
   6202 		Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
   6203 
   6204 		storeValue(float2);
   6205 	}
   6206 
   6207 	Type *Float2::getType()
   6208 	{
   6209 		return VectorType::get(Float::getType(), 2);
   6210 	}
   6211 
   6212 	Float4::Float4(RValue<Byte4> cast)
   6213 	{
   6214 		xyzw.parent = this;
   6215 
   6216 		#if 0
   6217 			Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());   // FIXME: Crashes
   6218 		#elif 0
   6219 			Value *vector = loadValue();
   6220 
   6221 			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
   6222 			Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
   6223 			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
   6224 
   6225 			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
   6226 			Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
   6227 			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
   6228 
   6229 			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
   6230 			Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
   6231 			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
   6232 
   6233 			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
   6234 			Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
   6235 			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
   6236 		#else
   6237 			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
   6238 			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
   6239 
   6240 			Value *e;
   6241 
   6242 			if(CPUID::supportsSSE4_1())
   6243 			{
   6244 				e = x86::pmovzxbd(RValue<Int4>(a)).value;
   6245 			}
   6246 			else
   6247 			{
   6248 				Constant *swizzle[16];
   6249 				swizzle[0] = Nucleus::createConstantInt(0);
   6250 				swizzle[1] = Nucleus::createConstantInt(16);
   6251 				swizzle[2] = Nucleus::createConstantInt(1);
   6252 				swizzle[3] = Nucleus::createConstantInt(17);
   6253 				swizzle[4] = Nucleus::createConstantInt(2);
   6254 				swizzle[5] = Nucleus::createConstantInt(18);
   6255 				swizzle[6] = Nucleus::createConstantInt(3);
   6256 				swizzle[7] = Nucleus::createConstantInt(19);
   6257 				swizzle[8] = Nucleus::createConstantInt(4);
   6258 				swizzle[9] = Nucleus::createConstantInt(20);
   6259 				swizzle[10] = Nucleus::createConstantInt(5);
   6260 				swizzle[11] = Nucleus::createConstantInt(21);
   6261 				swizzle[12] = Nucleus::createConstantInt(6);
   6262 				swizzle[13] = Nucleus::createConstantInt(22);
   6263 				swizzle[14] = Nucleus::createConstantInt(7);
   6264 				swizzle[15] = Nucleus::createConstantInt(23);
   6265 
   6266 				Value *b = Nucleus::createBitCast(a, Byte16::getType());
   6267 				Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), Nucleus::createConstantVector(swizzle, 16));
   6268 
   6269 				Constant *swizzle2[8];
   6270 				swizzle2[0] = Nucleus::createConstantInt(0);
   6271 				swizzle2[1] = Nucleus::createConstantInt(8);
   6272 				swizzle2[2] = Nucleus::createConstantInt(1);
   6273 				swizzle2[3] = Nucleus::createConstantInt(9);
   6274 				swizzle2[4] = Nucleus::createConstantInt(2);
   6275 				swizzle2[5] = Nucleus::createConstantInt(10);
   6276 				swizzle2[6] = Nucleus::createConstantInt(3);
   6277 				swizzle2[7] = Nucleus::createConstantInt(11);
   6278 
   6279 				Value *d = Nucleus::createBitCast(c, Short8::getType());
   6280 				e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle2, 8));
   6281 			}
   6282 
   6283 			Value *f = Nucleus::createBitCast(e, Int4::getType());
   6284 			Value *g = Nucleus::createSIToFP(f, Float4::getType());
   6285 			Value *xyzw = g;
   6286 		#endif
   6287 
   6288 		storeValue(xyzw);
   6289 	}
   6290 
   6291 	Float4::Float4(RValue<SByte4> cast)
   6292 	{
   6293 		xyzw.parent = this;
   6294 
   6295 		#if 0
   6296 			Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());   // FIXME: Crashes
   6297 		#elif 0
   6298 			Value *vector = loadValue();
   6299 
   6300 			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
   6301 			Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
   6302 			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
   6303 
   6304 			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
   6305 			Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
   6306 			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
   6307 
   6308 			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
   6309 			Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
   6310 			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
   6311 
   6312 			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
   6313 			Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
   6314 			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
   6315 		#else
   6316 			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
   6317 			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
   6318 
   6319 			Value *g;
   6320 
   6321 			if(CPUID::supportsSSE4_1())
   6322 			{
   6323 				g = x86::pmovsxbd(RValue<Int4>(a)).value;
   6324 			}
   6325 			else
   6326 			{
   6327 				Constant *swizzle[16];
   6328 				swizzle[0] = Nucleus::createConstantInt(0);
   6329 				swizzle[1] = Nucleus::createConstantInt(0);
   6330 				swizzle[2] = Nucleus::createConstantInt(1);
   6331 				swizzle[3] = Nucleus::createConstantInt(1);
   6332 				swizzle[4] = Nucleus::createConstantInt(2);
   6333 				swizzle[5] = Nucleus::createConstantInt(2);
   6334 				swizzle[6] = Nucleus::createConstantInt(3);
   6335 				swizzle[7] = Nucleus::createConstantInt(3);
   6336 				swizzle[8] = Nucleus::createConstantInt(4);
   6337 				swizzle[9] = Nucleus::createConstantInt(4);
   6338 				swizzle[10] = Nucleus::createConstantInt(5);
   6339 				swizzle[11] = Nucleus::createConstantInt(5);
   6340 				swizzle[12] = Nucleus::createConstantInt(6);
   6341 				swizzle[13] = Nucleus::createConstantInt(6);
   6342 				swizzle[14] = Nucleus::createConstantInt(7);
   6343 				swizzle[15] = Nucleus::createConstantInt(7);
   6344 
   6345 				Value *b = Nucleus::createBitCast(a, Byte16::getType());
   6346 				Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 16));
   6347 
   6348 				Constant *swizzle2[8];
   6349 				swizzle2[0] = Nucleus::createConstantInt(0);
   6350 				swizzle2[1] = Nucleus::createConstantInt(0);
   6351 				swizzle2[2] = Nucleus::createConstantInt(1);
   6352 				swizzle2[3] = Nucleus::createConstantInt(1);
   6353 				swizzle2[4] = Nucleus::createConstantInt(2);
   6354 				swizzle2[5] = Nucleus::createConstantInt(2);
   6355 				swizzle2[6] = Nucleus::createConstantInt(3);
   6356 				swizzle2[7] = Nucleus::createConstantInt(3);
   6357 
   6358 				Value *d = Nucleus::createBitCast(c, Short8::getType());
   6359 				Value *e = Nucleus::createShuffleVector(d, d, Nucleus::createConstantVector(swizzle2, 8));
   6360 
   6361 				Value *f = Nucleus::createBitCast(e, Int4::getType());
   6362 			//	g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
   6363 				g = x86::psrad(RValue<Int4>(f), 24).value;
   6364 			}
   6365 
   6366 			Value *xyzw = Nucleus::createSIToFP(g, Float4::getType());
   6367 		#endif
   6368 
   6369 		storeValue(xyzw);
   6370 	}
   6371 
   6372 	Float4::Float4(RValue<Short4> cast)
   6373 	{
   6374 		xyzw.parent = this;
   6375 
   6376 		Int4 c(cast);
   6377 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   6378 	}
   6379 
   6380 	Float4::Float4(RValue<UShort4> cast)
   6381 	{
   6382 		xyzw.parent = this;
   6383 
   6384 		Int4 c(cast);
   6385 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
   6386 	}
   6387 
   6388 	Float4::Float4(RValue<Int4> cast)
   6389 	{
   6390 		xyzw.parent = this;
   6391 
   6392 		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
   6393 
   6394 		storeValue(xyzw);
   6395 	}
   6396 
   6397 	Float4::Float4(RValue<UInt4> cast)
   6398 	{
   6399 		xyzw.parent = this;
   6400 
   6401 		Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
   6402 
   6403 		storeValue(xyzw);
   6404 	}
   6405 
   6406 	Float4::Float4()
   6407 	{
   6408 		xyzw.parent = this;
   6409 	}
   6410 
   6411 	Float4::Float4(float xyzw)
   6412 	{
   6413 		constant(xyzw, xyzw, xyzw, xyzw);
   6414 	}
   6415 
   6416 	Float4::Float4(float x, float yzw)
   6417 	{
   6418 		constant(x, yzw, yzw, yzw);
   6419 	}
   6420 
   6421 	Float4::Float4(float x, float y, float zw)
   6422 	{
   6423 		constant(x, y, zw, zw);
   6424 	}
   6425 
   6426 	Float4::Float4(float x, float y, float z, float w)
   6427 	{
   6428 		constant(x, y, z, w);
   6429 	}
   6430 
   6431 	void Float4::constant(float x, float y, float z, float w)
   6432 	{
   6433 		xyzw.parent = this;
   6434 
   6435 		Constant *constantVector[4];
   6436 		constantVector[0] = Nucleus::createConstantFloat(x);
   6437 		constantVector[1] = Nucleus::createConstantFloat(y);
   6438 		constantVector[2] = Nucleus::createConstantFloat(z);
   6439 		constantVector[3] = Nucleus::createConstantFloat(w);
   6440 
   6441 		storeValue(Nucleus::createConstantVector(constantVector, 4));
   6442 	}
   6443 
   6444 	Float4::Float4(RValue<Float4> rhs)
   6445 	{
   6446 		xyzw.parent = this;
   6447 
   6448 		storeValue(rhs.value);
   6449 	}
   6450 
   6451 	Float4::Float4(const Float4 &rhs)
   6452 	{
   6453 		xyzw.parent = this;
   6454 
   6455 		Value *value = rhs.loadValue();
   6456 		storeValue(value);
   6457 	}
   6458 
   6459 	Float4::Float4(const Reference<Float4> &rhs)
   6460 	{
   6461 		xyzw.parent = this;
   6462 
   6463 		Value *value = rhs.loadValue();
   6464 		storeValue(value);
   6465 	}
   6466 
   6467 	Float4::Float4(RValue<Float> rhs)
   6468 	{
   6469 		xyzw.parent = this;
   6470 
   6471 		Value *vector = loadValue();
   6472 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
   6473 
   6474 		Constant *swizzle[4];
   6475 		swizzle[0] = Nucleus::createConstantInt(0);
   6476 		swizzle[1] = Nucleus::createConstantInt(0);
   6477 		swizzle[2] = Nucleus::createConstantInt(0);
   6478 		swizzle[3] = Nucleus::createConstantInt(0);
   6479 
   6480 		Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4));
   6481 
   6482 		storeValue(replicate);
   6483 	}
   6484 
   6485 	Float4::Float4(const Float &rhs)
   6486 	{
   6487 		xyzw.parent = this;
   6488 
   6489 		*this = RValue<Float>(rhs.loadValue());
   6490 	}
   6491 
   6492 	Float4::Float4(const Reference<Float> &rhs)
   6493 	{
   6494 		xyzw.parent = this;
   6495 
   6496 		*this = RValue<Float>(rhs.loadValue());
   6497 	}
   6498 
   6499 	RValue<Float4> Float4::operator=(float x) const
   6500 	{
   6501 		return *this = Float4(x, x, x, x);
   6502 	}
   6503 
   6504 	RValue<Float4> Float4::operator=(RValue<Float4> rhs) const
   6505 	{
   6506 		storeValue(rhs.value);
   6507 
   6508 		return rhs;
   6509 	}
   6510 
   6511 	RValue<Float4> Float4::operator=(const Float4 &rhs) const
   6512 	{
   6513 		Value *value = rhs.loadValue();
   6514 		storeValue(value);
   6515 
   6516 		return RValue<Float4>(value);
   6517 	}
   6518 
   6519 	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const
   6520 	{
   6521 		Value *value = rhs.loadValue();
   6522 		storeValue(value);
   6523 
   6524 		return RValue<Float4>(value);
   6525 	}
   6526 
   6527 	RValue<Float4> Float4::operator=(RValue<Float> rhs) const
   6528 	{
   6529 		return *this = Float4(rhs);
   6530 	}
   6531 
   6532 	RValue<Float4> Float4::operator=(const Float &rhs) const
   6533 	{
   6534 		return *this = Float4(rhs);
   6535 	}
   6536 
   6537 	RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const
   6538 	{
   6539 		return *this = Float4(rhs);
   6540 	}
   6541 
   6542 	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
   6543 	{
   6544 		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
   6545 	}
   6546 
   6547 	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
   6548 	{
   6549 		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
   6550 	}
   6551 
   6552 	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
   6553 	{
   6554 		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
   6555 	}
   6556 
   6557 	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
   6558 	{
   6559 		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
   6560 	}
   6561 
   6562 	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
   6563 	{
   6564 		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
   6565 	}
   6566 
   6567 	RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs)
   6568 	{
   6569 		return lhs = lhs + rhs;
   6570 	}
   6571 
   6572 	RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs)
   6573 	{
   6574 		return lhs = lhs - rhs;
   6575 	}
   6576 
   6577 	RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs)
   6578 	{
   6579 		return lhs = lhs * rhs;
   6580 	}
   6581 
   6582 	RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs)
   6583 	{
   6584 		return lhs = lhs / rhs;
   6585 	}
   6586 
   6587 	RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs)
   6588 	{
   6589 		return lhs = lhs % rhs;
   6590 	}
   6591 
   6592 	RValue<Float4> operator+(RValue<Float4> val)
   6593 	{
   6594 		return val;
   6595 	}
   6596 
   6597 	RValue<Float4> operator-(RValue<Float4> val)
   6598 	{
   6599 		return RValue<Float4>(Nucleus::createFNeg(val.value));
   6600 	}
   6601 
   6602 	RValue<Float4> Abs(RValue<Float4> x)
   6603 	{
   6604 		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
   6605 
   6606 		Constant *constantVector[4];
   6607 		constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF);
   6608 		constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF);
   6609 		constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF);
   6610 		constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF);
   6611 
   6612 		Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, 4));
   6613 
   6614 		return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
   6615 	}
   6616 
   6617 	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
   6618 	{
   6619 		return x86::maxps(x, y);
   6620 	}
   6621 
   6622 	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
   6623 	{
   6624 		return x86::minps(x, y);
   6625 	}
   6626 
   6627 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
   6628 	{
   6629 		if(exactAtPow2)
   6630 		{
   6631 			// rcpps uses a piecewise-linear approximation which minimizes the relative error
   6632 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
   6633 			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
   6634 		}
   6635 		else
   6636 		{
   6637 			return x86::rcpps(x);
   6638 		}
   6639 	}
   6640 
   6641 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
   6642 	{
   6643 		return x86::rsqrtps(x);
   6644 	}
   6645 
   6646 	RValue<Float4> Sqrt(RValue<Float4> x)
   6647 	{
   6648 		return x86::sqrtps(x);
   6649 	}
   6650 
   6651 	RValue<Float4> Insert(const Float4 &val, RValue<Float> element, int i)
   6652 	{
   6653 		llvm::Value *value = val.loadValue();
   6654 		llvm::Value *insert = Nucleus::createInsertElement(value, element.value, i);
   6655 
   6656 		val = RValue<Float4>(insert);
   6657 
   6658 		return val;
   6659 	}
   6660 
   6661 	RValue<Float> Extract(RValue<Float4> x, int i)
   6662 	{
   6663 		return RValue<Float>(Nucleus::createExtractElement(x.value, i));
   6664 	}
   6665 
   6666 	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
   6667 	{
   6668 		return RValue<Float4>(Nucleus::createSwizzle(x.value, select));
   6669 	}
   6670 
   6671 	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
   6672 	{
   6673 		Constant *shuffle[4];
   6674 		shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0);
   6675 		shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0);
   6676 		shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4);
   6677 		shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4);
   6678 
   6679 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
   6680 	}
   6681 
   6682 	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
   6683 	{
   6684 		Constant *shuffle[4];
   6685 		shuffle[0] = Nucleus::createConstantInt(0);
   6686 		shuffle[1] = Nucleus::createConstantInt(4);
   6687 		shuffle[2] = Nucleus::createConstantInt(1);
   6688 		shuffle[3] = Nucleus::createConstantInt(5);
   6689 
   6690 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
   6691 	}
   6692 
   6693 	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
   6694 	{
   6695 		Constant *shuffle[4];
   6696 		shuffle[0] = Nucleus::createConstantInt(2);
   6697 		shuffle[1] = Nucleus::createConstantInt(6);
   6698 		shuffle[2] = Nucleus::createConstantInt(3);
   6699 		shuffle[3] = Nucleus::createConstantInt(7);
   6700 
   6701 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
   6702 	}
   6703 
   6704 	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
   6705 	{
   6706 		Value *vector = lhs.loadValue();
   6707 		Value *shuffle = Nucleus::createMask(vector, rhs.value, select);
   6708 		lhs.storeValue(shuffle);
   6709 
   6710 		return RValue<Float4>(shuffle);
   6711 	}
   6712 
   6713 	RValue<Int> SignMask(RValue<Float4> x)
   6714 	{
   6715 		return x86::movmskps(x);
   6716 	}
   6717 
   6718 	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
   6719 	{
   6720 	//	return As<Int4>(x86::cmpeqps(x, y));
   6721 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
   6722 	}
   6723 
   6724 	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
   6725 	{
   6726 	//	return As<Int4>(x86::cmpltps(x, y));
   6727 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
   6728 	}
   6729 
   6730 	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
   6731 	{
   6732 	//	return As<Int4>(x86::cmpleps(x, y));
   6733 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
   6734 	}
   6735 
   6736 	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
   6737 	{
   6738 	//	return As<Int4>(x86::cmpneqps(x, y));
   6739 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
   6740 	}
   6741 
   6742 	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
   6743 	{
   6744 	//	return As<Int4>(x86::cmpnltps(x, y));
   6745 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
   6746 	}
   6747 
   6748 	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
   6749 	{
   6750 	//	return As<Int4>(x86::cmpnleps(x, y));
   6751 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
   6752 	}
   6753 
   6754 	RValue<Float4> Round(RValue<Float4> x)
   6755 	{
   6756 		if(CPUID::supportsSSE4_1())
   6757 		{
   6758 			return x86::roundps(x, 0);
   6759 		}
   6760 		else
   6761 		{
   6762 			return Float4(RoundInt(x));
   6763 		}
   6764 	}
   6765 
   6766 	RValue<Float4> Trunc(RValue<Float4> x)
   6767 	{
   6768 		if(CPUID::supportsSSE4_1())
   6769 		{
   6770 			return x86::roundps(x, 3);
   6771 		}
   6772 		else
   6773 		{
   6774 			return Float4(Int4(x));   // Rounded toward zero
   6775 		}
   6776 	}
   6777 
   6778 	RValue<Float4> Frac(RValue<Float4> x)
   6779 	{
   6780 		if(CPUID::supportsSSE4_1())
   6781 		{
   6782 			return x - x86::floorps(x);
   6783 		}
   6784 		else
   6785 		{
   6786 			Float4 frc = x - Float4(Int4(x));   // Signed fractional part
   6787 
   6788 			return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
   6789 		}
   6790 	}
   6791 
   6792 	RValue<Float4> Floor(RValue<Float4> x)
   6793 	{
   6794 		if(CPUID::supportsSSE4_1())
   6795 		{
   6796 			return x86::floorps(x);
   6797 		}
   6798 		else
   6799 		{
   6800 			return x - Frac(x);
   6801 		}
   6802 	}
   6803 
   6804 	RValue<Float4> Ceil(RValue<Float4> x)
   6805 	{
   6806 		if(CPUID::supportsSSE4_1())
   6807 		{
   6808 			return x86::ceilps(x);
   6809 		}
   6810 		else
   6811 		{
   6812 			return -Floor(-x);
   6813 		}
   6814 	}
   6815 
   6816 	Type *Float4::getType()
   6817 	{
   6818 		return VectorType::get(Float::getType(), 4);
   6819 	}
   6820 
   6821 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
   6822 	{
   6823 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Nucleus::createConstantInt(offset)));
   6824 	}
   6825 
   6826 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   6827 	{
   6828 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
   6829 	}
   6830 
   6831 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   6832 	{
   6833 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
   6834 	}
   6835 
   6836 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
   6837 	{
   6838 		return lhs = lhs + offset;
   6839 	}
   6840 
   6841 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset)
   6842 	{
   6843 		return lhs = lhs + offset;
   6844 	}
   6845 
   6846 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset)
   6847 	{
   6848 		return lhs = lhs + offset;
   6849 	}
   6850 
   6851 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
   6852 	{
   6853 		return lhs + -offset;
   6854 	}
   6855 
   6856 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
   6857 	{
   6858 		return lhs + -offset;
   6859 	}
   6860 
   6861 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
   6862 	{
   6863 		return lhs + -offset;
   6864 	}
   6865 
   6866 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
   6867 	{
   6868 		return lhs = lhs - offset;
   6869 	}
   6870 
   6871 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset)
   6872 	{
   6873 		return lhs = lhs - offset;
   6874 	}
   6875 
   6876 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset)
   6877 	{
   6878 		return lhs = lhs - offset;
   6879 	}
   6880 
   6881 	void Return()
   6882 	{
   6883 		Nucleus::createRetVoid();
   6884 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   6885 		Nucleus::createUnreachable();
   6886 	}
   6887 
   6888 	void Return(bool ret)
   6889 	{
   6890 		Nucleus::createRet(Nucleus::createConstantBool(ret));
   6891 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   6892 		Nucleus::createUnreachable();
   6893 	}
   6894 
   6895 	void Return(const Int &ret)
   6896 	{
   6897 		Nucleus::createRet(ret.loadValue());
   6898 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
   6899 		Nucleus::createUnreachable();
   6900 	}
   6901 
   6902 	BasicBlock *beginLoop()
   6903 	{
   6904 		BasicBlock *loopBB = Nucleus::createBasicBlock();
   6905 
   6906 		Nucleus::createBr(loopBB);
   6907 		Nucleus::setInsertBlock(loopBB);
   6908 
   6909 		return loopBB;
   6910 	}
   6911 
   6912 	bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
   6913 	{
   6914 		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
   6915 		Nucleus::setInsertBlock(bodyBB);
   6916 
   6917 		return true;
   6918 	}
   6919 
   6920 	bool elseBlock(BasicBlock *falseBB)
   6921 	{
   6922 		falseBB->back().eraseFromParent();
   6923 		Nucleus::setInsertBlock(falseBB);
   6924 
   6925 		return true;
   6926 	}
   6927 
   6928 	RValue<Long> Ticks()
   6929 	{
   6930 		Module *module = Nucleus::getModule();
   6931 		llvm::Function *rdtsc = Intrinsic::getDeclaration(module, Intrinsic::readcyclecounter);
   6932 
   6933 		return RValue<Long>(Nucleus::createCall(rdtsc));
   6934 	}
   6935 }
   6936 
   6937 namespace sw
   6938 {
   6939 	namespace x86
   6940 	{
   6941 		RValue<Int> cvtss2si(RValue<Float> val)
   6942 		{
   6943 			Module *module = Nucleus::getModule();
   6944 			llvm::Function *cvtss2si = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtss2si);
   6945 
   6946 			Float4 vector;
   6947 			vector.x = val;
   6948 
   6949 			return RValue<Int>(Nucleus::createCall(cvtss2si, RValue<Float4>(vector).value));
   6950 		}
   6951 
   6952 		RValue<Int2> cvtps2pi(RValue<Float4> val)
   6953 		{
   6954 			Module *module = Nucleus::getModule();
   6955 			llvm::Function *cvtps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtps2pi);
   6956 
   6957 			return RValue<Int2>(Nucleus::createCall(cvtps2pi, val.value));
   6958 		}
   6959 
   6960 		RValue<Int2> cvttps2pi(RValue<Float4> val)
   6961 		{
   6962 			Module *module = Nucleus::getModule();
   6963 			llvm::Function *cvttps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvttps2pi);
   6964 
   6965 			return RValue<Int2>(Nucleus::createCall(cvttps2pi, val.value));
   6966 		}
   6967 
   6968 		RValue<Int4> cvtps2dq(RValue<Float4> val)
   6969 		{
   6970 			if(CPUID::supportsSSE2())
   6971 			{
   6972 				Module *module = Nucleus::getModule();
   6973 				llvm::Function *cvtps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtps2dq);
   6974 
   6975 				return RValue<Int4>(Nucleus::createCall(cvtps2dq, val.value));
   6976 			}
   6977 			else
   6978 			{
   6979 				Int2 lo = x86::cvtps2pi(val);
   6980 				Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
   6981 
   6982 				return Int4(lo, hi);
   6983 			}
   6984 		}
   6985 
   6986 		RValue<Float> rcpss(RValue<Float> val)
   6987 		{
   6988 			Module *module = Nucleus::getModule();
   6989 			llvm::Function *rcpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ss);
   6990 
   6991 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
   6992 
   6993 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rcpss, vector), 0));
   6994 		}
   6995 
   6996 		RValue<Float> sqrtss(RValue<Float> val)
   6997 		{
   6998 			Module *module = Nucleus::getModule();
   6999 			llvm::Function *sqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ss);
   7000 
   7001 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
   7002 
   7003 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(sqrtss, vector), 0));
   7004 		}
   7005 
   7006 		RValue<Float> rsqrtss(RValue<Float> val)
   7007 		{
   7008 			Module *module = Nucleus::getModule();
   7009 			llvm::Function *rsqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ss);
   7010 
   7011 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
   7012 
   7013 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rsqrtss, vector), 0));
   7014 		}
   7015 
   7016 		RValue<Float4> rcpps(RValue<Float4> val)
   7017 		{
   7018 			Module *module = Nucleus::getModule();
   7019 			llvm::Function *rcpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ps);
   7020 
   7021 			return RValue<Float4>(Nucleus::createCall(rcpps, val.value));
   7022 		}
   7023 
   7024 		RValue<Float4> sqrtps(RValue<Float4> val)
   7025 		{
   7026 			Module *module = Nucleus::getModule();
   7027 			llvm::Function *sqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ps);
   7028 
   7029 			return RValue<Float4>(Nucleus::createCall(sqrtps, val.value));
   7030 		}
   7031 
   7032 		RValue<Float4> rsqrtps(RValue<Float4> val)
   7033 		{
   7034 			Module *module = Nucleus::getModule();
   7035 			llvm::Function *rsqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ps);
   7036 
   7037 			return RValue<Float4>(Nucleus::createCall(rsqrtps, val.value));
   7038 		}
   7039 
   7040 		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
   7041 		{
   7042 			Module *module = Nucleus::getModule();
   7043 			llvm::Function *maxps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_max_ps);
   7044 
   7045 			return RValue<Float4>(Nucleus::createCall(maxps, x.value, y.value));
   7046 		}
   7047 
   7048 		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
   7049 		{
   7050 			Module *module = Nucleus::getModule();
   7051 			llvm::Function *minps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_min_ps);
   7052 
   7053 			return RValue<Float4>(Nucleus::createCall(minps, x.value, y.value));
   7054 		}
   7055 
   7056 		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
   7057 		{
   7058 			Module *module = Nucleus::getModule();
   7059 			llvm::Function *roundss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ss);
   7060 
   7061 			Value *undef = UndefValue::get(Float4::getType());
   7062 			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
   7063 
   7064 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(roundss, undef, vector, Nucleus::createConstantInt(imm)), 0));
   7065 		}
   7066 
   7067 		RValue<Float> floorss(RValue<Float> val)
   7068 		{
   7069 			return roundss(val, 1);
   7070 		}
   7071 
   7072 		RValue<Float> ceilss(RValue<Float> val)
   7073 		{
   7074 			return roundss(val, 2);
   7075 		}
   7076 
   7077 		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
   7078 		{
   7079 			Module *module = Nucleus::getModule();
   7080 			llvm::Function *roundps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ps);
   7081 
   7082 			return RValue<Float4>(Nucleus::createCall(roundps, val.value, Nucleus::createConstantInt(imm)));
   7083 		}
   7084 
   7085 		RValue<Float4> floorps(RValue<Float4> val)
   7086 		{
   7087 			return roundps(val, 1);
   7088 		}
   7089 
   7090 		RValue<Float4> ceilps(RValue<Float4> val)
   7091 		{
   7092 			return roundps(val, 2);
   7093 		}
   7094 
   7095 		RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
   7096 		{
   7097 			Module *module = Nucleus::getModule();
   7098 			llvm::Function *cmpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ps);
   7099 
   7100 			return RValue<Float4>(Nucleus::createCall(cmpps, x.value, y.value, Nucleus::createConstantByte(imm)));
   7101 		}
   7102 
   7103 		RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
   7104 		{
   7105 			return cmpps(x, y, 0);
   7106 		}
   7107 
   7108 		RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
   7109 		{
   7110 			return cmpps(x, y, 1);
   7111 		}
   7112 
   7113 		RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
   7114 		{
   7115 			return cmpps(x, y, 2);
   7116 		}
   7117 
   7118 		RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
   7119 		{
   7120 			return cmpps(x, y, 3);
   7121 		}
   7122 
   7123 		RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
   7124 		{
   7125 			return cmpps(x, y, 4);
   7126 		}
   7127 
   7128 		RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
   7129 		{
   7130 			return cmpps(x, y, 5);
   7131 		}
   7132 
   7133 		RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
   7134 		{
   7135 			return cmpps(x, y, 6);
   7136 		}
   7137 
   7138 		RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
   7139 		{
   7140 			return cmpps(x, y, 7);
   7141 		}
   7142 
   7143 		RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
   7144 		{
   7145 			Module *module = Nucleus::getModule();
   7146 			llvm::Function *cmpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ss);
   7147 
   7148 			Value *vector1 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), x.value, 0);
   7149 			Value *vector2 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), y.value, 0);
   7150 
   7151 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(cmpss, vector1, vector2, Nucleus::createConstantByte(imm)), 0));
   7152 		}
   7153 
   7154 		RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
   7155 		{
   7156 			return cmpss(x, y, 0);
   7157 		}
   7158 
   7159 		RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
   7160 		{
   7161 			return cmpss(x, y, 1);
   7162 		}
   7163 
   7164 		RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
   7165 		{
   7166 			return cmpss(x, y, 2);
   7167 		}
   7168 
   7169 		RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
   7170 		{
   7171 			return cmpss(x, y, 3);
   7172 		}
   7173 
   7174 		RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
   7175 		{
   7176 			return cmpss(x, y, 4);
   7177 		}
   7178 
   7179 		RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
   7180 		{
   7181 			return cmpss(x, y, 5);
   7182 		}
   7183 
   7184 		RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
   7185 		{
   7186 			return cmpss(x, y, 6);
   7187 		}
   7188 
   7189 		RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
   7190 		{
   7191 			return cmpss(x, y, 7);
   7192 		}
   7193 
   7194 		RValue<Int4> pabsd(RValue<Int4> x)
   7195 		{
   7196 			Module *module = Nucleus::getModule();
   7197 			llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128);
   7198 
   7199 			return RValue<Int4>(Nucleus::createCall(pabsd, x.value));
   7200 		}
   7201 
   7202 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
   7203 		{
   7204 			Module *module = Nucleus::getModule();
   7205 			llvm::Function *paddsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_w);
   7206 
   7207 			return As<Short4>(RValue<MMX>(Nucleus::createCall(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
   7208 		}
   7209 
   7210 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
   7211 		{
   7212 			Module *module = Nucleus::getModule();
   7213 			llvm::Function *psubsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_w);
   7214 
   7215 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
   7216 		}
   7217 
   7218 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
   7219 		{
   7220 			Module *module = Nucleus::getModule();
   7221 			llvm::Function *paddusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_w);
   7222 
   7223 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
   7224 		}
   7225 
   7226 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
   7227 		{
   7228 			Module *module = Nucleus::getModule();
   7229 			llvm::Function *psubusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_w);
   7230 
   7231 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
   7232 		}
   7233 
   7234 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
   7235 		{
   7236 			Module *module = Nucleus::getModule();
   7237 			llvm::Function *paddsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_b);
   7238 
   7239 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
   7240 		}
   7241 
   7242 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
   7243 		{
   7244 			Module *module = Nucleus::getModule();
   7245 			llvm::Function *psubsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_b);
   7246 
   7247 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
   7248 		}
   7249 
   7250 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
   7251 		{
   7252 			Module *module = Nucleus::getModule();
   7253 			llvm::Function *paddusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_b);
   7254 
   7255 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
   7256 		}
   7257 
   7258 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
   7259 		{
   7260 			Module *module = Nucleus::getModule();
   7261 			llvm::Function *psubusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_b);
   7262 
   7263 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
   7264 		}
   7265 
   7266 		RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
   7267 		{
   7268 			Module *module = Nucleus::getModule();
   7269 			llvm::Function *paddw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_w);
   7270 
   7271 			return As<Short4>(RValue<MMX>(Nucleus::createCall(paddw, As<MMX>(x).value, As<MMX>(y).value)));
   7272 		}
   7273 
   7274 		RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
   7275 		{
   7276 			Module *module = Nucleus::getModule();
   7277 			llvm::Function *psubw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_w);
   7278 
   7279 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psubw, As<MMX>(x).value, As<MMX>(y).value)));
   7280 		}
   7281 
   7282 		RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
   7283 		{
   7284 			Module *module = Nucleus::getModule();
   7285 			llvm::Function *pmullw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmull_w);
   7286 
   7287 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
   7288 		}
   7289 
   7290 		RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
   7291 		{
   7292 			Module *module = Nucleus::getModule();
   7293 			llvm::Function *pand = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pand);
   7294 
   7295 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pand, As<MMX>(x).value, As<MMX>(y).value)));
   7296 		}
   7297 
   7298 		RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
   7299 		{
   7300 			Module *module = Nucleus::getModule();
   7301 			llvm::Function *por = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_por);
   7302 
   7303 			return As<Short4>(RValue<MMX>(Nucleus::createCall(por, As<MMX>(x).value, As<MMX>(y).value)));
   7304 		}
   7305 
   7306 		RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
   7307 		{
   7308 			Module *module = Nucleus::getModule();
   7309 			llvm::Function *pxor = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pxor);
   7310 
   7311 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pxor, As<MMX>(x).value, As<MMX>(y).value)));
   7312 		}
   7313 
   7314 		RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
   7315 		{
   7316 			Module *module = Nucleus::getModule();
   7317 			llvm::Function *pshufw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_pshuf_w);
   7318 
   7319 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pshufw, As<MMX>(x).value, Nucleus::createConstantByte(y))));
   7320 		}
   7321 
   7322 		RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
   7323 		{
   7324 			Module *module = Nucleus::getModule();
   7325 			llvm::Function *punpcklwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklwd);
   7326 
   7327 			return As<Int2>(RValue<MMX>(Nucleus::createCall(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
   7328 		}
   7329 
   7330 		RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
   7331 		{
   7332 			Module *module = Nucleus::getModule();
   7333 			llvm::Function *punpckhwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhwd);
   7334 
   7335 			return As<Int2>(RValue<MMX>(Nucleus::createCall(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
   7336 		}
   7337 
   7338 		RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
   7339 		{
   7340 			Module *module = Nucleus::getModule();
   7341 			llvm::Function *pinsrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pinsr_w);
   7342 
   7343 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pinsrw, As<MMX>(x).value, y.value, Nucleus::createConstantInt(i))));
   7344 		}
   7345 
   7346 		RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
   7347 		{
   7348 			Module *module = Nucleus::getModule();
   7349 			llvm::Function *pextrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pextr_w);
   7350 
   7351 			return RValue<Int>(Nucleus::createCall(pextrw, As<MMX>(x).value, Nucleus::createConstantInt(i)));
   7352 		}
   7353 
   7354 		RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y)
   7355 		{
   7356 			Module *module = Nucleus::getModule();
   7357 			llvm::Function *punpckldq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckldq);
   7358 
   7359 			return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
   7360 		}
   7361 
   7362 		RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y)
   7363 		{
   7364 			Module *module = Nucleus::getModule();
   7365 			llvm::Function *punpckhdq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhdq);
   7366 
   7367 			return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
   7368 		}
   7369 
   7370 		RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
   7371 		{
   7372 			Module *module = Nucleus::getModule();
   7373 			llvm::Function *punpcklbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklbw);
   7374 
   7375 			return As<Short4>(RValue<MMX>(Nucleus::createCall(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
   7376 		}
   7377 
   7378 		RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
   7379 		{
   7380 			Module *module = Nucleus::getModule();
   7381 			llvm::Function *punpckhbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhbw);
   7382 
   7383 			return As<Short4>(RValue<MMX>(Nucleus::createCall(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
   7384 		}
   7385 
   7386 		RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
   7387 		{
   7388 			Module *module = Nucleus::getModule();
   7389 			llvm::Function *paddb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_b);
   7390 
   7391 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddb, As<MMX>(x).value, As<MMX>(y).value)));
   7392 		}
   7393 
   7394 		RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
   7395 		{
   7396 			Module *module = Nucleus::getModule();
   7397 			llvm::Function *psubb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_b);
   7398 
   7399 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubb, As<MMX>(x).value, As<MMX>(y).value)));
   7400 		}
   7401 
   7402 		RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
   7403 		{
   7404 			Module *module = Nucleus::getModule();
   7405 			llvm::Function *paddd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_d);
   7406 
   7407 			return As<Int2>(RValue<MMX>(Nucleus::createCall(paddd, As<MMX>(x).value, As<MMX>(y).value)));
   7408 		}
   7409 
   7410 		RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
   7411 		{
   7412 			Module *module = Nucleus::getModule();
   7413 			llvm::Function *psubd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_d);
   7414 
   7415 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psubd, As<MMX>(x).value, As<MMX>(y).value)));
   7416 		}
   7417 
   7418 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
   7419 		{
   7420 			Module *module = Nucleus::getModule();
   7421 			llvm::Function *pavgw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pavg_w);
   7422 
   7423 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
   7424 		}
   7425 
   7426 		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
   7427 		{
   7428 			Module *module = Nucleus::getModule();
   7429 			llvm::Function *pmaxsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmaxs_w);
   7430 
   7431 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
   7432 		}
   7433 
   7434 		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
   7435 		{
   7436 			Module *module = Nucleus::getModule();
   7437 			llvm::Function *pminsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmins_w);
   7438 
   7439 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pminsw,  As<MMX>(x).value, As<MMX>(y).value)));
   7440 		}
   7441 
   7442 		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
   7443 		{
   7444 			Module *module = Nucleus::getModule();
   7445 			llvm::Function *pcmpgtw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_w);
   7446 
   7447 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
   7448 		}
   7449 
   7450 		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
   7451 		{
   7452 			Module *module = Nucleus::getModule();
   7453 			llvm::Function *pcmpeqw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_w);
   7454 
   7455 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
   7456 		}
   7457 
   7458 		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
   7459 		{
   7460 			Module *module = Nucleus::getModule();
   7461 			llvm::Function *pcmpgtb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_b);
   7462 
   7463 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
   7464 		}
   7465 
   7466 		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
   7467 		{
   7468 			Module *module = Nucleus::getModule();
   7469 			llvm::Function *pcmpeqb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_b);
   7470 
   7471 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
   7472 		}
   7473 
   7474 		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
   7475 		{
   7476 			Module *module = Nucleus::getModule();
   7477 			llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packssdw);
   7478 
   7479 			return As<Short4>(RValue<MMX>(Nucleus::createCall(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
   7480 		}
   7481 
   7482 		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
   7483 		{
   7484 			if(CPUID::supportsSSE2())
   7485 			{
   7486 				Module *module = Nucleus::getModule();
   7487 				llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_packssdw_128);
   7488 
   7489 				return RValue<Short8>(Nucleus::createCall(packssdw, x.value, y.value));
   7490 			}
   7491 			else
   7492 			{
   7493 				Int2 loX = Int2(x);
   7494 				Int2 hiX = Int2(Swizzle(x, 0xEE));
   7495 
   7496 				Int2 loY = Int2(y);
   7497 				Int2 hiY = Int2(Swizzle(y, 0xEE));
   7498 
   7499 				Short4 lo = x86::packssdw(loX, hiX);
   7500 				Short4 hi = x86::packssdw(loY, hiY);
   7501 
   7502 				return Short8(lo, hi);
   7503 			}
   7504 		}
   7505 
   7506 		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
   7507 		{
   7508 			Module *module = Nucleus::getModule();
   7509 			llvm::Function *packsswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packsswb);
   7510 
   7511 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
   7512 		}
   7513 
   7514 		RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
   7515 		{
   7516 			Module *module = Nucleus::getModule();
   7517 			llvm::Function *packuswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packuswb);
   7518 
   7519 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
   7520 		}
   7521 
   7522 		RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
   7523 		{
   7524 			if(CPUID::supportsSSE4_1())
   7525 			{
   7526 				Module *module = Nucleus::getModule();
   7527 				llvm::Function *packusdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_packusdw);
   7528 
   7529 				return RValue<UShort8>(Nucleus::createCall(packusdw, x.value, y.value));
   7530 			}
   7531 			else
   7532 			{
   7533 				// FIXME: Not an exact replacement!
   7534 				return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
   7535 			}
   7536 		}
   7537 
   7538 		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
   7539 		{
   7540 			Module *module = Nucleus::getModule();
   7541 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_w);
   7542 
   7543 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7544 		}
   7545 
   7546 		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
   7547 		{
   7548 			Module *module = Nucleus::getModule();
   7549 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_w);
   7550 
   7551 			return RValue<UShort8>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y)));
   7552 		}
   7553 
   7554 		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
   7555 		{
   7556 			Module *module = Nucleus::getModule();
   7557 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_w);
   7558 
   7559 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7560 		}
   7561 
   7562 		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
   7563 		{
   7564 			Module *module = Nucleus::getModule();
   7565 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_w);
   7566 
   7567 			return RValue<Short8>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y)));
   7568 		}
   7569 
   7570 		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
   7571 		{
   7572 			Module *module = Nucleus::getModule();
   7573 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_w);
   7574 
   7575 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7576 		}
   7577 
   7578 		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
   7579 		{
   7580 			Module *module = Nucleus::getModule();
   7581 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_w);
   7582 
   7583 			return RValue<Short8>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y)));
   7584 		}
   7585 
   7586 		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
   7587 		{
   7588 			Module *module = Nucleus::getModule();
   7589 			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_d);
   7590 
   7591 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7592 		}
   7593 
   7594 		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
   7595 		{
   7596 			if(CPUID::supportsSSE2())
   7597 			{
   7598 				Module *module = Nucleus::getModule();
   7599 				llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_d);
   7600 
   7601 				return RValue<Int4>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y)));
   7602 			}
   7603 			else
   7604 			{
   7605 				Int2 lo = Int2(x);
   7606 				Int2 hi = Int2(Swizzle(x, 0xEE));
   7607 
   7608 				lo = x86::pslld(lo, y);
   7609 				hi = x86::pslld(hi, y);
   7610 
   7611 				return Int4(lo, hi);
   7612 			}
   7613 		}
   7614 
   7615 		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
   7616 		{
   7617 			Module *module = Nucleus::getModule();
   7618 			llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_d);
   7619 
   7620 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psrad, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7621 		}
   7622 
   7623 		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
   7624 		{
   7625 			if(CPUID::supportsSSE2())
   7626 			{
   7627 				Module *module = Nucleus::getModule();
   7628 				llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_d);
   7629 
   7630 				return RValue<Int4>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y)));
   7631 			}
   7632 			else
   7633 			{
   7634 				Int2 lo = Int2(x);
   7635 				Int2 hi = Int2(Swizzle(x, 0xEE));
   7636 
   7637 				lo = x86::psrad(lo, y);
   7638 				hi = x86::psrad(hi, y);
   7639 
   7640 				return Int4(lo, hi);
   7641 			}
   7642 		}
   7643 
   7644 		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
   7645 		{
   7646 			Module *module = Nucleus::getModule();
   7647 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_d);
   7648 
   7649 			return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
   7650 		}
   7651 
   7652 		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
   7653 		{
   7654 			if(CPUID::supportsSSE2())
   7655 			{
   7656 				Module *module = Nucleus::getModule();
   7657 				llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_d);
   7658 
   7659 				return RValue<UInt4>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y)));
   7660 			}
   7661 			else
   7662 			{
   7663 				UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
   7664 				UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
   7665 
   7666 				lo = x86::psrld(lo, y);
   7667 				hi = x86::psrld(hi, y);
   7668 
   7669 				return UInt4(lo, hi);
   7670 			}
   7671 		}
   7672 
   7673 		RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y)
   7674 		{
   7675 			Module *module = Nucleus::getModule();
   7676 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_w);
   7677 
   7678 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, As<MMX>(y).value)));
   7679 		}
   7680 
   7681 		RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y)
   7682 		{
   7683 			Module *module = Nucleus::getModule();
   7684 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_w);
   7685 
   7686 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, As<MMX>(y).value)));
   7687 		}
   7688 
   7689 		RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y)
   7690 		{
   7691 			Module *module = Nucleus::getModule();
   7692 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_w);
   7693 
   7694 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, As<MMX>(y).value)));
   7695 		}
   7696 
   7697 		RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y)
   7698 		{
   7699 			Module *module = Nucleus::getModule();
   7700 			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_d);
   7701 
   7702 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, As<MMX>(y).value)));
   7703 		}
   7704 
   7705 		RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y)
   7706 		{
   7707 			Module *module = Nucleus::getModule();
   7708 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_d);
   7709 
   7710 			return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
   7711 		}
   7712 
   7713 		RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y)
   7714 		{
   7715 			Module *module = Nucleus::getModule();
   7716 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_d);
   7717 
   7718 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
   7719 		}
   7720 
   7721 		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
   7722 		{
   7723 			Module *module = Nucleus::getModule();
   7724 			llvm::Function *pmaxsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxsd);
   7725 
   7726 			return RValue<Int4>(Nucleus::createCall(pmaxsd, x.value, y.value));
   7727 		}
   7728 
   7729 		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
   7730 		{
   7731 			Module *module = Nucleus::getModule();
   7732 			llvm::Function *pminsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminsd);
   7733 
   7734 			return RValue<Int4>(Nucleus::createCall(pminsd, x.value, y.value));
   7735 		}
   7736 
   7737 		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
   7738 		{
   7739 			Module *module = Nucleus::getModule();
   7740 			llvm::Function *pmaxud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxud);
   7741 
   7742 			return RValue<UInt4>(Nucleus::createCall(pmaxud, x.value, y.value));
   7743 		}
   7744 
   7745 		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
   7746 		{
   7747 			Module *module = Nucleus::getModule();
   7748 			llvm::Function *pminud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminud);
   7749 
   7750 			return RValue<UInt4>(Nucleus::createCall(pminud, x.value, y.value));
   7751 		}
   7752 
   7753 		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
   7754 		{
   7755 			Module *module = Nucleus::getModule();
   7756 			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulh_w);
   7757 
   7758 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
   7759 		}
   7760 
   7761 		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
   7762 		{
   7763 			Module *module = Nucleus::getModule();
   7764 			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulhu_w);
   7765 
   7766 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
   7767 		}
   7768 
   7769 		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
   7770 		{
   7771 			Module *module = Nucleus::getModule();
   7772 			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmadd_wd);
   7773 
   7774 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
   7775 		}
   7776 
   7777 		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
   7778 		{
   7779 			Module *module = Nucleus::getModule();
   7780 			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulh_w);
   7781 
   7782 			return RValue<Short8>(Nucleus::createCall(pmulhw, x.value, y.value));
   7783 		}
   7784 
   7785 		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
   7786 		{
   7787 			Module *module = Nucleus::getModule();
   7788 			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulhu_w);
   7789 
   7790 			return RValue<UShort8>(Nucleus::createCall(pmulhuw, x.value, y.value));
   7791 		}
   7792 
   7793 		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
   7794 		{
   7795 			Module *module = Nucleus::getModule();
   7796 			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmadd_wd);
   7797 
   7798 			return RValue<Int4>(Nucleus::createCall(pmaddwd, x.value, y.value));
   7799 		}
   7800 
   7801 		RValue<Int> movmskps(RValue<Float4> x)
   7802 		{
   7803 			Module *module = Nucleus::getModule();
   7804 			llvm::Function *movmskps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_movmsk_ps);
   7805 
   7806 			return RValue<Int>(Nucleus::createCall(movmskps, x.value));
   7807 		}
   7808 
   7809 		RValue<Int> pmovmskb(RValue<Byte8> x)
   7810 		{
   7811 			Module *module = Nucleus::getModule();
   7812 			llvm::Function *pmovmskb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmovmskb);
   7813 
   7814 			return RValue<Int>(Nucleus::createCall(pmovmskb, As<MMX>(x).value));
   7815 		}
   7816 
   7817 		//RValue<Int2> movd(RValue<Pointer<Int>> x)
   7818 		//{
   7819 		//	Value *element = Nucleus::createLoad(x.value);
   7820 
   7821 		////	Value *int2 = UndefValue::get(Int2::getType());
   7822 		////	int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
   7823 
   7824 		//	Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
   7825 
   7826 		//	return RValue<Int2>(int2);
   7827 		//}
   7828 
   7829 		//RValue<Int2> movdq2q(RValue<Int4> x)
   7830 		//{
   7831 		//	Value *long2 = Nucleus::createBitCast(x.value, Long2::getType());
   7832 		//	Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
   7833 
   7834 		//	return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
   7835 		//}
   7836 
   7837 		RValue<Int4> pmovzxbd(RValue<Int4> x)
   7838 		{
   7839 			Module *module = Nucleus::getModule();
   7840 			llvm::Function *pmovzxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxbd);
   7841 
   7842 			return RValue<Int4>(Nucleus::createCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())));
   7843 		}
   7844 
   7845 		RValue<Int4> pmovsxbd(RValue<Int4> x)
   7846 		{
   7847 			Module *module = Nucleus::getModule();
   7848 			llvm::Function *pmovsxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxbd);
   7849 
   7850 			return RValue<Int4>(Nucleus::createCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())));
   7851 		}
   7852 
   7853 		RValue<Int4> pmovzxwd(RValue<Int4> x)
   7854 		{
   7855 			Module *module = Nucleus::getModule();
   7856 			llvm::Function *pmovzxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxwd);
   7857 
   7858 			return RValue<Int4>(Nucleus::createCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())));
   7859 		}
   7860 
   7861 		RValue<Int4> pmovsxwd(RValue<Int4> x)
   7862 		{
   7863 			Module *module = Nucleus::getModule();
   7864 			llvm::Function *pmovsxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxwd);
   7865 
   7866 			return RValue<Int4>(Nucleus::createCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())));
   7867 		}
   7868 
   7869 		void emms()
   7870 		{
   7871 			Module *module = Nucleus::getModule();
   7872 			llvm::Function *emms = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_emms);
   7873 
   7874 			Nucleus::createCall(emms);
   7875 		}
   7876 	}
   7877 }
   7878