Home | History | Annotate | Download | only in Device
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Renderer.hpp"
     16 
     17 #include "Clipper.hpp"
     18 #include "Surface.hpp"
     19 #include "Primitive.hpp"
     20 #include "Polygon.hpp"
     21 #include "WSI/FrameBuffer.hpp"
     22 #include "Device/SwiftConfig.hpp"
     23 #include "Reactor/Reactor.hpp"
     24 #include "Pipeline/Constants.hpp"
     25 #include "System/MutexLock.hpp"
     26 #include "System/CPUID.hpp"
     27 #include "System/Memory.hpp"
     28 #include "System/Resource.hpp"
     29 #include "System/Half.hpp"
     30 #include "System/Math.hpp"
     31 #include "System/Timer.hpp"
     32 #include "Vulkan/VkDebug.hpp"
     33 
     34 #undef max
     35 
     36 bool disableServer = true;
     37 
     38 #ifndef NDEBUG
     39 unsigned int minPrimitives = 1;
     40 unsigned int maxPrimitives = 1 << 21;
     41 #endif
     42 
     43 namespace sw
     44 {
     45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
     46 	extern bool booleanFaceRegister;
     47 	extern bool fullPixelPositionRegister;
     48 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
     49 	extern bool secondaryColor;             // Specular lighting is applied after texturing
     50 	extern bool colorsDefaultToZero;
     51 
     52 	extern bool forceWindowed;
     53 	extern bool complementaryDepthBuffer;
     54 	extern bool postBlendSRGB;
     55 	extern bool exactColorRounding;
     56 	extern TransparencyAntialiasing transparencyAntialiasing;
     57 	extern bool forceClearRegisters;
     58 
     59 	extern bool precacheVertex;
     60 	extern bool precacheSetup;
     61 	extern bool precachePixel;
     62 
     63 	static const int batchSize = 128;
     64 	AtomicInt threadCount(1);
     65 	AtomicInt Renderer::unitCount(1);
     66 	AtomicInt Renderer::clusterCount(1);
     67 
     68 	TranscendentalPrecision logPrecision = ACCURATE;
     69 	TranscendentalPrecision expPrecision = ACCURATE;
     70 	TranscendentalPrecision rcpPrecision = ACCURATE;
     71 	TranscendentalPrecision rsqPrecision = ACCURATE;
     72 	bool perspectiveCorrection = true;
     73 
     74 	static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding)
     75 	{
     76 		static bool initialized = false;
     77 
     78 		if(!initialized)
     79 		{
     80 			sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
     81 			sw::booleanFaceRegister = conventions.booleanFaceRegister;
     82 			sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
     83 			sw::leadingVertexFirst = conventions.leadingVertexFirst;
     84 			sw::secondaryColor = conventions.secondaryColor;
     85 			sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
     86 			sw::exactColorRounding = exactColorRounding;
     87 			initialized = true;
     88 		}
     89 	}
     90 
     91 	struct Parameters
     92 	{
     93 		Renderer *renderer;
     94 		int threadIndex;
     95 	};
     96 
     97 	DrawCall::DrawCall()
     98 	{
     99 		queries = 0;
    100 
    101 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
    102 		vsDirtyConstI = 16;
    103 		vsDirtyConstB = 16;
    104 
    105 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
    106 		psDirtyConstI = 16;
    107 		psDirtyConstB = 16;
    108 
    109 		references = -1;
    110 
    111 		data = (DrawData*)allocate(sizeof(DrawData));
    112 		data->constants = &constants;
    113 	}
    114 
    115 	DrawCall::~DrawCall()
    116 	{
    117 		delete queries;
    118 
    119 		deallocate(data);
    120 	}
    121 
    122 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
    123 	{
    124 		setGlobalRenderingSettings(conventions, exactColorRounding);
    125 
    126 		setRenderTarget(0, nullptr);
    127 		clipper = new Clipper;
    128 		blitter = new Blitter;
    129 
    130 		updateClipPlanes = true;
    131 
    132 		#if PERF_HUD
    133 			resetTimers();
    134 		#endif
    135 
    136 		for(int i = 0; i < 16; i++)
    137 		{
    138 			vertexTask[i] = nullptr;
    139 
    140 			worker[i] = nullptr;
    141 			resume[i] = nullptr;
    142 			suspend[i] = nullptr;
    143 		}
    144 
    145 		threadsAwake = 0;
    146 		resumeApp = new Event();
    147 
    148 		currentDraw = 0;
    149 		nextDraw = 0;
    150 
    151 		qHead = 0;
    152 		qSize = 0;
    153 
    154 		for(int i = 0; i < 16; i++)
    155 		{
    156 			triangleBatch[i] = nullptr;
    157 			primitiveBatch[i] = nullptr;
    158 		}
    159 
    160 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    161 		{
    162 			drawCall[draw] = new DrawCall();
    163 			drawList[draw] = drawCall[draw];
    164 		}
    165 
    166 		for(int unit = 0; unit < 16; unit++)
    167 		{
    168 			primitiveProgress[unit].init();
    169 		}
    170 
    171 		for(int cluster = 0; cluster < 16; cluster++)
    172 		{
    173 			pixelProgress[cluster].init();
    174 		}
    175 
    176 		clipFlags = 0;
    177 
    178 		swiftConfig = new SwiftConfig(disableServer);
    179 		updateConfiguration(true);
    180 
    181 		sync = new Resource(0);
    182 	}
    183 
    184 	Renderer::~Renderer()
    185 	{
    186 		sync->destruct();
    187 
    188 		delete clipper;
    189 		clipper = nullptr;
    190 
    191 		delete blitter;
    192 		blitter = nullptr;
    193 
    194 		terminateThreads();
    195 		delete resumeApp;
    196 
    197 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    198 		{
    199 			delete drawCall[draw];
    200 		}
    201 
    202 		delete swiftConfig;
    203 	}
    204 
    205 	// This object has to be mem aligned
    206 	void* Renderer::operator new(size_t size)
    207 	{
    208 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
    209 		return sw::allocate(sizeof(Renderer), 16);
    210 	}
    211 
    212 	void Renderer::operator delete(void * mem)
    213 	{
    214 		sw::deallocate(mem);
    215 	}
    216 
    217 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
    218 	{
    219 		#ifndef NDEBUG
    220 			if(count < minPrimitives || count > maxPrimitives)
    221 			{
    222 				return;
    223 			}
    224 		#endif
    225 
    226 		context->drawType = drawType;
    227 
    228 		updateConfiguration();
    229 		updateClipper();
    230 
    231 		int ms = context->getMultiSampleCount();
    232 		unsigned int oldMultiSampleMask = context->multiSampleMask;
    233 		context->multiSampleMask = context->sampleMask & ((unsigned)0xFFFFFFFF >> (32 - ms));
    234 
    235 		if(!context->multiSampleMask)
    236 		{
    237 			return;
    238 		}
    239 
    240 		sync->lock(sw::PRIVATE);
    241 
    242 		if(update || oldMultiSampleMask != context->multiSampleMask)
    243 		{
    244 			vertexState = VertexProcessor::update(drawType);
    245 			setupState = SetupProcessor::update();
    246 			pixelState = PixelProcessor::update();
    247 
    248 			vertexRoutine = VertexProcessor::routine(vertexState);
    249 			setupRoutine = SetupProcessor::routine(setupState);
    250 			pixelRoutine = PixelProcessor::routine(pixelState);
    251 		}
    252 
    253 		int batch = batchSize / ms;
    254 
    255 		int (Renderer::*setupPrimitives)(int batch, int count);
    256 
    257 		if(context->isDrawTriangle())
    258 		{
    259 			setupPrimitives = &Renderer::setupTriangles;
    260 		}
    261 		else if(context->isDrawLine())
    262 		{
    263 			setupPrimitives = &Renderer::setupLines;
    264 		}
    265 		else   // Point draw
    266 		{
    267 			setupPrimitives = &Renderer::setupPoints;
    268 		}
    269 
    270 		DrawCall *draw = nullptr;
    271 
    272 		do
    273 		{
    274 			for(int i = 0; i < DRAW_COUNT; i++)
    275 			{
    276 				if(drawCall[i]->references == -1)
    277 				{
    278 					draw = drawCall[i];
    279 					drawList[nextDraw & DRAW_COUNT_BITS] = draw;
    280 
    281 					break;
    282 				}
    283 			}
    284 
    285 			if(!draw)
    286 			{
    287 				resumeApp->wait();
    288 			}
    289 		}
    290 		while(!draw);
    291 
    292 		DrawData *data = draw->data;
    293 
    294 		if(queries.size() != 0)
    295 		{
    296 			draw->queries = new std::list<Query*>();
    297 			bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
    298 			for(auto &query : queries)
    299 			{
    300 				if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
    301 				{
    302 					++query->reference; // Atomic
    303 					draw->queries->push_back(query);
    304 				}
    305 			}
    306 		}
    307 
    308 		draw->drawType = drawType;
    309 		draw->batchSize = batch;
    310 
    311 		vertexRoutine->bind();
    312 		setupRoutine->bind();
    313 		pixelRoutine->bind();
    314 
    315 		draw->vertexRoutine = vertexRoutine;
    316 		draw->setupRoutine = setupRoutine;
    317 		draw->pixelRoutine = pixelRoutine;
    318 		draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
    319 		draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
    320 		draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
    321 		draw->setupPrimitives = setupPrimitives;
    322 		draw->setupState = setupState;
    323 
    324 		for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
    325 		{
    326 			draw->vertexStream[i] = context->input[i].resource;
    327 			data->input[i] = context->input[i].buffer;
    328 			data->stride[i] = context->input[i].stride;
    329 
    330 			if(draw->vertexStream[i])
    331 			{
    332 				draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
    333 			}
    334 		}
    335 
    336 		if(context->indexBuffer)
    337 		{
    338 			data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
    339 		}
    340 
    341 		draw->indexBuffer = context->indexBuffer;
    342 
    343 		for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
    344 		{
    345 			draw->texture[sampler] = 0;
    346 		}
    347 
    348 		for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
    349 		{
    350 			if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
    351 			{
    352 				draw->texture[sampler] = context->texture[sampler];
    353 				draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
    354 
    355 				data->mipmap[sampler] = context->sampler[sampler].getTextureData();
    356 			}
    357 		}
    358 
    359 		if(context->pixelShader)
    360 		{
    361 			if(draw->psDirtyConstF)
    362 			{
    363 				memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
    364 				draw->psDirtyConstF = 0;
    365 			}
    366 
    367 			if(draw->psDirtyConstI)
    368 			{
    369 				memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
    370 				draw->psDirtyConstI = 0;
    371 			}
    372 
    373 			if(draw->psDirtyConstB)
    374 			{
    375 				memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
    376 				draw->psDirtyConstB = 0;
    377 			}
    378 
    379 			PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
    380 		}
    381 		else
    382 		{
    383 			for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    384 			{
    385 				draw->pUniformBuffers[i] = nullptr;
    386 			}
    387 		}
    388 
    389 		for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
    390 		{
    391 			if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
    392 			{
    393 				draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
    394 				draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
    395 
    396 				data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
    397 			}
    398 		}
    399 
    400 		if(draw->vsDirtyConstF)
    401 		{
    402 			memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
    403 			draw->vsDirtyConstF = 0;
    404 		}
    405 
    406 		if(draw->vsDirtyConstI)
    407 		{
    408 			memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
    409 			draw->vsDirtyConstI = 0;
    410 		}
    411 
    412 		if(draw->vsDirtyConstB)
    413 		{
    414 			memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
    415 			draw->vsDirtyConstB = 0;
    416 		}
    417 
    418 		if(context->vertexShader->isInstanceIdDeclared())
    419 		{
    420 			data->instanceID = context->instanceID;
    421 		}
    422 
    423 		VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
    424 		VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
    425 
    426 		if(pixelState.stencilActive)
    427 		{
    428 			data->stencil[0] = stencil;
    429 			data->stencil[1] = stencilCCW;
    430 		}
    431 
    432 		if(setupState.isDrawPoint)
    433 		{
    434 			data->pointSizeMin = pointSizeMin;
    435 			data->pointSizeMax = pointSizeMax;
    436 		}
    437 
    438 		data->lineWidth = context->lineWidth;
    439 
    440 		data->factor = factor;
    441 
    442 		if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
    443 		{
    444 			float ref = context->alphaReference * (1.0f / 255.0f);
    445 			float margin = sw::min(ref, 1.0f - ref);
    446 
    447 			if(ms == 4)
    448 			{
    449 				data->a2c0 = replicate(ref - margin * 0.6f);
    450 				data->a2c1 = replicate(ref - margin * 0.2f);
    451 				data->a2c2 = replicate(ref + margin * 0.2f);
    452 				data->a2c3 = replicate(ref + margin * 0.6f);
    453 			}
    454 			else if(ms == 2)
    455 			{
    456 				data->a2c0 = replicate(ref - margin * 0.3f);
    457 				data->a2c1 = replicate(ref + margin * 0.3f);
    458 			}
    459 			else ASSERT(false);
    460 		}
    461 
    462 		if(pixelState.occlusionEnabled)
    463 		{
    464 			for(int cluster = 0; cluster < clusterCount; cluster++)
    465 			{
    466 				data->occlusion[cluster] = 0;
    467 			}
    468 		}
    469 
    470 		#if PERF_PROFILE
    471 			for(int cluster = 0; cluster < clusterCount; cluster++)
    472 			{
    473 				for(int i = 0; i < PERF_TIMERS; i++)
    474 				{
    475 					data->cycles[i][cluster] = 0;
    476 				}
    477 			}
    478 		#endif
    479 
    480 		// Viewport
    481 		{
    482 			float W = 0.5f * viewport.width;
    483 			float H = 0.5f * viewport.height;
    484 			float X0 = viewport.x + W;
    485 			float Y0 = viewport.y + H;
    486 			float N = viewport.minDepth;
    487 			float F = viewport.maxDepth;
    488 			float Z = F - N;
    489 
    490 			if(context->isDrawTriangle())
    491 			{
    492 				N += context->depthBias;
    493 			}
    494 
    495 			if(complementaryDepthBuffer)
    496 			{
    497 				Z = -Z;
    498 				N = 1 - N;
    499 			}
    500 
    501 			data->Wx16 = replicate(W * 16);
    502 			data->Hx16 = replicate(H * 16);
    503 			data->X0x16 = replicate(X0 * 16 - 8);
    504 			data->Y0x16 = replicate(Y0 * 16 - 8);
    505 			data->halfPixelX = replicate(0.5f / W);
    506 			data->halfPixelY = replicate(0.5f / H);
    507 			data->viewportHeight = abs(viewport.height);
    508 			data->slopeDepthBias = context->slopeDepthBias;
    509 			data->depthRange = Z;
    510 			data->depthNear = N;
    511 			draw->clipFlags = clipFlags;
    512 
    513 			if(clipFlags)
    514 			{
    515 				if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
    516 				if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
    517 				if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
    518 				if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
    519 				if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
    520 				if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
    521 			}
    522 		}
    523 
    524 		// Target
    525 		{
    526 			for(int index = 0; index < RENDERTARGETS; index++)
    527 			{
    528 				draw->renderTarget[index] = context->renderTarget[index];
    529 
    530 				if(draw->renderTarget[index])
    531 				{
    532 					unsigned int layer = context->renderTargetLayer[index];
    533 					data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    534 					data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
    535 					data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
    536 				}
    537 			}
    538 
    539 			draw->depthBuffer = context->depthBuffer;
    540 			draw->stencilBuffer = context->stencilBuffer;
    541 
    542 			if(draw->depthBuffer)
    543 			{
    544 				unsigned int layer = context->depthBufferLayer;
    545 				data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    546 				data->depthPitchB = context->depthBuffer->getInternalPitchB();
    547 				data->depthSliceB = context->depthBuffer->getInternalSliceB();
    548 			}
    549 
    550 			if(draw->stencilBuffer)
    551 			{
    552 				unsigned int layer = context->stencilBufferLayer;
    553 				data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
    554 				data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
    555 				data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
    556 			}
    557 		}
    558 
    559 		// Scissor
    560 		{
    561 			data->scissorX0 = scissor.x0;
    562 			data->scissorX1 = scissor.x1;
    563 			data->scissorY0 = scissor.y0;
    564 			data->scissorY1 = scissor.y1;
    565 		}
    566 
    567 		draw->primitive = 0;
    568 		draw->count = count;
    569 
    570 		draw->references = (count + batch - 1) / batch;
    571 
    572 		schedulerMutex.lock();
    573 		++nextDraw; // Atomic
    574 		schedulerMutex.unlock();
    575 
    576 		#ifndef NDEBUG
    577 		if(threadCount == 1)   // Use main thread for draw execution
    578 		{
    579 			threadsAwake = 1;
    580 			task[0].type = Task::RESUME;
    581 
    582 			taskLoop(0);
    583 		}
    584 		else
    585 		#endif
    586 		{
    587 			if(!threadsAwake)
    588 			{
    589 				suspend[0]->wait();
    590 
    591 				threadsAwake = 1;
    592 				task[0].type = Task::RESUME;
    593 
    594 				resume[0]->signal();
    595 			}
    596 		}
    597 	}
    598 
    599 	void Renderer::clear(void *value, VkFormat format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
    600 	{
    601 		blitter->clear(value, format, dest, clearRect, rgbaMask);
    602 	}
    603 
    604 	void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
    605 	{
    606 		blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
    607 	}
    608 
    609 	void Renderer::blit3D(Surface *source, Surface *dest)
    610 	{
    611 		blitter->blit3D(source, dest);
    612 	}
    613 
    614 	void Renderer::threadFunction(void *parameters)
    615 	{
    616 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
    617 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
    618 
    619 		if(logPrecision < IEEE)
    620 		{
    621 			CPUID::setFlushToZero(true);
    622 			CPUID::setDenormalsAreZero(true);
    623 		}
    624 
    625 		renderer->threadLoop(threadIndex);
    626 	}
    627 
    628 	void Renderer::threadLoop(int threadIndex)
    629 	{
    630 		while(!exitThreads)
    631 		{
    632 			taskLoop(threadIndex);
    633 
    634 			suspend[threadIndex]->signal();
    635 			resume[threadIndex]->wait();
    636 		}
    637 	}
    638 
    639 	void Renderer::taskLoop(int threadIndex)
    640 	{
    641 		while(task[threadIndex].type != Task::SUSPEND)
    642 		{
    643 			scheduleTask(threadIndex);
    644 			executeTask(threadIndex);
    645 		}
    646 	}
    647 
    648 	void Renderer::findAvailableTasks()
    649 	{
    650 		// Find pixel tasks
    651 		for(int cluster = 0; cluster < clusterCount; cluster++)
    652 		{
    653 			if(!pixelProgress[cluster].executing)
    654 			{
    655 				for(int unit = 0; unit < unitCount; unit++)
    656 				{
    657 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
    658 					{
    659 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
    660 						{
    661 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
    662 							{
    663 								Task &task = taskQueue[qHead];
    664 								task.type = Task::PIXELS;
    665 								task.primitiveUnit = unit;
    666 								task.pixelCluster = cluster;
    667 
    668 								pixelProgress[cluster].executing = true;
    669 
    670 								// Commit to the task queue
    671 								qHead = (qHead + 1) & TASK_COUNT_BITS;
    672 								qSize++;
    673 
    674 								break;
    675 							}
    676 						}
    677 					}
    678 				}
    679 			}
    680 		}
    681 
    682 		// Find primitive tasks
    683 		if(currentDraw == nextDraw)
    684 		{
    685 			return;   // No more primitives to process
    686 		}
    687 
    688 		for(int unit = 0; unit < unitCount; unit++)
    689 		{
    690 			DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
    691 
    692 			int primitive = draw->primitive;
    693 			int count = draw->count;
    694 
    695 			if(primitive >= count)
    696 			{
    697 				++currentDraw; // Atomic
    698 
    699 				if(currentDraw == nextDraw)
    700 				{
    701 					return;   // No more primitives to process
    702 				}
    703 
    704 				draw = drawList[currentDraw & DRAW_COUNT_BITS];
    705 			}
    706 
    707 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
    708 			{
    709 				primitive = draw->primitive;
    710 				count = draw->count;
    711 				int batch = draw->batchSize;
    712 
    713 				primitiveProgress[unit].drawCall = currentDraw;
    714 				primitiveProgress[unit].firstPrimitive = primitive;
    715 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
    716 
    717 				draw->primitive += batch;
    718 
    719 				Task &task = taskQueue[qHead];
    720 				task.type = Task::PRIMITIVES;
    721 				task.primitiveUnit = unit;
    722 
    723 				primitiveProgress[unit].references = -1;
    724 
    725 				// Commit to the task queue
    726 				qHead = (qHead + 1) & TASK_COUNT_BITS;
    727 				qSize++;
    728 			}
    729 		}
    730 	}
    731 
    732 	void Renderer::scheduleTask(int threadIndex)
    733 	{
    734 		schedulerMutex.lock();
    735 
    736 		int curThreadsAwake = threadsAwake;
    737 
    738 		if((int)qSize < threadCount - curThreadsAwake + 1)
    739 		{
    740 			findAvailableTasks();
    741 		}
    742 
    743 		if(qSize != 0)
    744 		{
    745 			task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
    746 			qSize--;
    747 
    748 			if(curThreadsAwake != threadCount)
    749 			{
    750 				int wakeup = qSize - curThreadsAwake + 1;
    751 
    752 				for(int i = 0; i < threadCount && wakeup > 0; i++)
    753 				{
    754 					if(task[i].type == Task::SUSPEND)
    755 					{
    756 						suspend[i]->wait();
    757 						task[i].type = Task::RESUME;
    758 						resume[i]->signal();
    759 
    760 						++threadsAwake; // Atomic
    761 						wakeup--;
    762 					}
    763 				}
    764 			}
    765 		}
    766 		else
    767 		{
    768 			task[threadIndex].type = Task::SUSPEND;
    769 
    770 			--threadsAwake; // Atomic
    771 		}
    772 
    773 		schedulerMutex.unlock();
    774 	}
    775 
    776 	void Renderer::executeTask(int threadIndex)
    777 	{
    778 		#if PERF_HUD
    779 			int64_t startTick = Timer::ticks();
    780 		#endif
    781 
    782 		switch(task[threadIndex].type)
    783 		{
    784 		case Task::PRIMITIVES:
    785 			{
    786 				int unit = task[threadIndex].primitiveUnit;
    787 
    788 				int input = primitiveProgress[unit].firstPrimitive;
    789 				int count = primitiveProgress[unit].primitiveCount;
    790 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    791 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
    792 
    793 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
    794 
    795 				#if PERF_HUD
    796 					int64_t time = Timer::ticks();
    797 					vertexTime[threadIndex] += time - startTick;
    798 					startTick = time;
    799 				#endif
    800 
    801 				int visible = 0;
    802 
    803 				if(!draw->setupState.rasterizerDiscard)
    804 				{
    805 					visible = (this->*setupPrimitives)(unit, count);
    806 				}
    807 
    808 				primitiveProgress[unit].visible = visible;
    809 				primitiveProgress[unit].references = clusterCount;
    810 
    811 				#if PERF_HUD
    812 					setupTime[threadIndex] += Timer::ticks() - startTick;
    813 				#endif
    814 			}
    815 			break;
    816 		case Task::PIXELS:
    817 			{
    818 				int unit = task[threadIndex].primitiveUnit;
    819 				int visible = primitiveProgress[unit].visible;
    820 
    821 				if(visible > 0)
    822 				{
    823 					int cluster = task[threadIndex].pixelCluster;
    824 					Primitive *primitive = primitiveBatch[unit];
    825 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
    826 					DrawData *data = draw->data;
    827 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
    828 
    829 					pixelRoutine(primitive, visible, cluster, data);
    830 				}
    831 
    832 				finishRendering(task[threadIndex]);
    833 
    834 				#if PERF_HUD
    835 					pixelTime[threadIndex] += Timer::ticks() - startTick;
    836 				#endif
    837 			}
    838 			break;
    839 		case Task::RESUME:
    840 			break;
    841 		case Task::SUSPEND:
    842 			break;
    843 		default:
    844 			ASSERT(false);
    845 		}
    846 	}
    847 
    848 	void Renderer::synchronize()
    849 	{
    850 		sync->lock(sw::PUBLIC);
    851 		sync->unlock();
    852 	}
    853 
    854 	void Renderer::finishRendering(Task &pixelTask)
    855 	{
    856 		int unit = pixelTask.primitiveUnit;
    857 		int cluster = pixelTask.pixelCluster;
    858 
    859 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    860 		DrawData &data = *draw.data;
    861 		int primitive = primitiveProgress[unit].firstPrimitive;
    862 		int count = primitiveProgress[unit].primitiveCount;
    863 		int processedPrimitives = primitive + count;
    864 
    865 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
    866 
    867 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
    868 		{
    869 			++pixelProgress[cluster].drawCall; // Atomic
    870 			pixelProgress[cluster].processedPrimitives = 0;
    871 		}
    872 
    873 		int ref = primitiveProgress[unit].references--; // Atomic
    874 
    875 		if(ref == 0)
    876 		{
    877 			ref = draw.references--; // Atomic
    878 
    879 			if(ref == 0)
    880 			{
    881 				#if PERF_PROFILE
    882 					for(int cluster = 0; cluster < clusterCount; cluster++)
    883 					{
    884 						for(int i = 0; i < PERF_TIMERS; i++)
    885 						{
    886 							profiler.cycles[i] += data.cycles[i][cluster];
    887 						}
    888 					}
    889 				#endif
    890 
    891 				if(draw.queries)
    892 				{
    893 					for(auto &query : *(draw.queries))
    894 					{
    895 						switch(query->type)
    896 						{
    897 						case Query::FRAGMENTS_PASSED:
    898 							for(int cluster = 0; cluster < clusterCount; cluster++)
    899 							{
    900 								query->data += data.occlusion[cluster];
    901 							}
    902 							break;
    903 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
    904 							query->data += processedPrimitives;
    905 							break;
    906 						default:
    907 							break;
    908 						}
    909 
    910 						--query->reference; // Atomic
    911 					}
    912 
    913 					delete draw.queries;
    914 					draw.queries = 0;
    915 				}
    916 
    917 				for(int i = 0; i < RENDERTARGETS; i++)
    918 				{
    919 					if(draw.renderTarget[i])
    920 					{
    921 						draw.renderTarget[i]->unlockInternal();
    922 					}
    923 				}
    924 
    925 				if(draw.depthBuffer)
    926 				{
    927 					draw.depthBuffer->unlockInternal();
    928 				}
    929 
    930 				if(draw.stencilBuffer)
    931 				{
    932 					draw.stencilBuffer->unlockStencil();
    933 				}
    934 
    935 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
    936 				{
    937 					if(draw.texture[i])
    938 					{
    939 						draw.texture[i]->unlock();
    940 					}
    941 				}
    942 
    943 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
    944 				{
    945 					if(draw.vertexStream[i])
    946 					{
    947 						draw.vertexStream[i]->unlock();
    948 					}
    949 				}
    950 
    951 				if(draw.indexBuffer)
    952 				{
    953 					draw.indexBuffer->unlock();
    954 				}
    955 
    956 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    957 				{
    958 					if(draw.pUniformBuffers[i])
    959 					{
    960 						draw.pUniformBuffers[i]->unlock();
    961 					}
    962 					if(draw.vUniformBuffers[i])
    963 					{
    964 						draw.vUniformBuffers[i]->unlock();
    965 					}
    966 				}
    967 
    968 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
    969 				{
    970 					if(draw.transformFeedbackBuffers[i])
    971 					{
    972 						draw.transformFeedbackBuffers[i]->unlock();
    973 					}
    974 				}
    975 
    976 				draw.vertexRoutine->unbind();
    977 				draw.setupRoutine->unbind();
    978 				draw.pixelRoutine->unbind();
    979 
    980 				sync->unlock();
    981 
    982 				draw.references = -1;
    983 				resumeApp->signal();
    984 			}
    985 		}
    986 
    987 		pixelProgress[cluster].executing = false;
    988 	}
    989 
    990 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
    991 	{
    992 		Triangle *triangle = triangleBatch[unit];
    993 		int primitiveDrawCall = primitiveProgress[unit].drawCall;
    994 		DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
    995 		DrawData *data = draw->data;
    996 		VertexTask *task = vertexTask[thread];
    997 
    998 		const void *indices = data->indices;
    999 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
   1000 
   1001 		if(task->vertexCache.drawCall != primitiveDrawCall)
   1002 		{
   1003 			task->vertexCache.clear();
   1004 			task->vertexCache.drawCall = primitiveDrawCall;
   1005 		}
   1006 
   1007 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
   1008 
   1009 		switch(draw->drawType)
   1010 		{
   1011 		case DRAW_POINTLIST:
   1012 			{
   1013 				unsigned int index = start;
   1014 
   1015 				for(unsigned int i = 0; i < triangleCount; i++)
   1016 				{
   1017 					batch[i][0] = index;
   1018 					batch[i][1] = index;
   1019 					batch[i][2] = index;
   1020 
   1021 					index += 1;
   1022 				}
   1023 			}
   1024 			break;
   1025 		case DRAW_LINELIST:
   1026 			{
   1027 				unsigned int index = 2 * start;
   1028 
   1029 				for(unsigned int i = 0; i < triangleCount; i++)
   1030 				{
   1031 					batch[i][0] = index + 0;
   1032 					batch[i][1] = index + 1;
   1033 					batch[i][2] = index + 1;
   1034 
   1035 					index += 2;
   1036 				}
   1037 			}
   1038 			break;
   1039 		case DRAW_LINESTRIP:
   1040 			{
   1041 				unsigned int index = start;
   1042 
   1043 				for(unsigned int i = 0; i < triangleCount; i++)
   1044 				{
   1045 					batch[i][0] = index + 0;
   1046 					batch[i][1] = index + 1;
   1047 					batch[i][2] = index + 1;
   1048 
   1049 					index += 1;
   1050 				}
   1051 			}
   1052 			break;
   1053 		case DRAW_TRIANGLELIST:
   1054 			{
   1055 				unsigned int index = 3 * start;
   1056 
   1057 				for(unsigned int i = 0; i < triangleCount; i++)
   1058 				{
   1059 					batch[i][0] = index + 0;
   1060 					batch[i][1] = index + 1;
   1061 					batch[i][2] = index + 2;
   1062 
   1063 					index += 3;
   1064 				}
   1065 			}
   1066 			break;
   1067 		case DRAW_TRIANGLESTRIP:
   1068 			{
   1069 				unsigned int index = start;
   1070 
   1071 				for(unsigned int i = 0; i < triangleCount; i++)
   1072 				{
   1073 					if(leadingVertexFirst)
   1074 					{
   1075 						batch[i][0] = index + 0;
   1076 						batch[i][1] = index + (index & 1) + 1;
   1077 						batch[i][2] = index + (~index & 1) + 1;
   1078 					}
   1079 					else
   1080 					{
   1081 						batch[i][0] = index + (index & 1);
   1082 						batch[i][1] = index + (~index & 1);
   1083 						batch[i][2] = index + 2;
   1084 					}
   1085 
   1086 					index += 1;
   1087 				}
   1088 			}
   1089 			break;
   1090 		case DRAW_TRIANGLEFAN:
   1091 			{
   1092 				unsigned int index = start;
   1093 
   1094 				for(unsigned int i = 0; i < triangleCount; i++)
   1095 				{
   1096 					if(leadingVertexFirst)
   1097 					{
   1098 						batch[i][0] = index + 1;
   1099 						batch[i][1] = index + 2;
   1100 						batch[i][2] = 0;
   1101 					}
   1102 					else
   1103 					{
   1104 						batch[i][0] = 0;
   1105 						batch[i][1] = index + 1;
   1106 						batch[i][2] = index + 2;
   1107 					}
   1108 
   1109 					index += 1;
   1110 				}
   1111 			}
   1112 			break;
   1113 		case DRAW_INDEXEDPOINTLIST16:
   1114 			{
   1115 				const unsigned short *index = (const unsigned short*)indices + start;
   1116 
   1117 				for(unsigned int i = 0; i < triangleCount; i++)
   1118 				{
   1119 					batch[i][0] = *index;
   1120 					batch[i][1] = *index;
   1121 					batch[i][2] = *index;
   1122 
   1123 					index += 1;
   1124 				}
   1125 			}
   1126 			break;
   1127 		case DRAW_INDEXEDPOINTLIST32:
   1128 			{
   1129 				const unsigned int *index = (const unsigned int*)indices + start;
   1130 
   1131 				for(unsigned int i = 0; i < triangleCount; i++)
   1132 				{
   1133 					batch[i][0] = *index;
   1134 					batch[i][1] = *index;
   1135 					batch[i][2] = *index;
   1136 
   1137 					index += 1;
   1138 				}
   1139 			}
   1140 			break;
   1141 		case DRAW_INDEXEDLINELIST16:
   1142 			{
   1143 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
   1144 
   1145 				for(unsigned int i = 0; i < triangleCount; i++)
   1146 				{
   1147 					batch[i][0] = index[0];
   1148 					batch[i][1] = index[1];
   1149 					batch[i][2] = index[1];
   1150 
   1151 					index += 2;
   1152 				}
   1153 			}
   1154 			break;
   1155 		case DRAW_INDEXEDLINELIST32:
   1156 			{
   1157 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
   1158 
   1159 				for(unsigned int i = 0; i < triangleCount; i++)
   1160 				{
   1161 					batch[i][0] = index[0];
   1162 					batch[i][1] = index[1];
   1163 					batch[i][2] = index[1];
   1164 
   1165 					index += 2;
   1166 				}
   1167 			}
   1168 			break;
   1169 		case DRAW_INDEXEDLINESTRIP16:
   1170 			{
   1171 				const unsigned short *index = (const unsigned short*)indices + start;
   1172 
   1173 				for(unsigned int i = 0; i < triangleCount; i++)
   1174 				{
   1175 					batch[i][0] = index[0];
   1176 					batch[i][1] = index[1];
   1177 					batch[i][2] = index[1];
   1178 
   1179 					index += 1;
   1180 				}
   1181 			}
   1182 			break;
   1183 		case DRAW_INDEXEDLINESTRIP32:
   1184 			{
   1185 				const unsigned int *index = (const unsigned int*)indices + start;
   1186 
   1187 				for(unsigned int i = 0; i < triangleCount; i++)
   1188 				{
   1189 					batch[i][0] = index[0];
   1190 					batch[i][1] = index[1];
   1191 					batch[i][2] = index[1];
   1192 
   1193 					index += 1;
   1194 				}
   1195 			}
   1196 			break;
   1197 		case DRAW_INDEXEDTRIANGLELIST16:
   1198 			{
   1199 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
   1200 
   1201 				for(unsigned int i = 0; i < triangleCount; i++)
   1202 				{
   1203 					batch[i][0] = index[0];
   1204 					batch[i][1] = index[1];
   1205 					batch[i][2] = index[2];
   1206 
   1207 					index += 3;
   1208 				}
   1209 			}
   1210 			break;
   1211 		case DRAW_INDEXEDTRIANGLELIST32:
   1212 			{
   1213 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
   1214 
   1215 				for(unsigned int i = 0; i < triangleCount; i++)
   1216 				{
   1217 					batch[i][0] = index[0];
   1218 					batch[i][1] = index[1];
   1219 					batch[i][2] = index[2];
   1220 
   1221 					index += 3;
   1222 				}
   1223 			}
   1224 			break;
   1225 		case DRAW_INDEXEDTRIANGLESTRIP16:
   1226 			{
   1227 				const unsigned short *index = (const unsigned short*)indices + start;
   1228 
   1229 				for(unsigned int i = 0; i < triangleCount; i++)
   1230 				{
   1231 					batch[i][0] = index[0];
   1232 					batch[i][1] = index[((start + i) & 1) + 1];
   1233 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1234 
   1235 					index += 1;
   1236 				}
   1237 			}
   1238 			break;
   1239 		case DRAW_INDEXEDTRIANGLESTRIP32:
   1240 			{
   1241 				const unsigned int *index = (const unsigned int*)indices + start;
   1242 
   1243 				for(unsigned int i = 0; i < triangleCount; i++)
   1244 				{
   1245 					batch[i][0] = index[0];
   1246 					batch[i][1] = index[((start + i) & 1) + 1];
   1247 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1248 
   1249 					index += 1;
   1250 				}
   1251 			}
   1252 			break;
   1253 		case DRAW_INDEXEDTRIANGLEFAN16:
   1254 			{
   1255 				const unsigned short *index = (const unsigned short*)indices;
   1256 
   1257 				for(unsigned int i = 0; i < triangleCount; i++)
   1258 				{
   1259 					batch[i][0] = index[start + i + 1];
   1260 					batch[i][1] = index[start + i + 2];
   1261 					batch[i][2] = index[0];
   1262 				}
   1263 			}
   1264 			break;
   1265 		case DRAW_INDEXEDTRIANGLEFAN32:
   1266 			{
   1267 				const unsigned int *index = (const unsigned int*)indices;
   1268 
   1269 				for(unsigned int i = 0; i < triangleCount; i++)
   1270 				{
   1271 					batch[i][0] = index[start + i + 1];
   1272 					batch[i][1] = index[start + i + 2];
   1273 					batch[i][2] = index[0];
   1274 				}
   1275 			}
   1276 			break;
   1277 		default:
   1278 			ASSERT(false);
   1279 			return;
   1280 		}
   1281 
   1282 		task->primitiveStart = start;
   1283 		task->vertexCount = triangleCount * 3;
   1284 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
   1285 	}
   1286 
   1287 	int Renderer::setupTriangles(int unit, int count)
   1288 	{
   1289 		Triangle *triangle = triangleBatch[unit];
   1290 		Primitive *primitive = primitiveBatch[unit];
   1291 
   1292 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1293 		SetupProcessor::State &state = draw.setupState;
   1294 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1295 
   1296 		int ms = state.multiSample;
   1297 		int pos = state.positionRegister;
   1298 		const DrawData *data = draw.data;
   1299 		int visible = 0;
   1300 
   1301 		for(int i = 0; i < count; i++, triangle++)
   1302 		{
   1303 			Vertex &v0 = triangle->v0;
   1304 			Vertex &v1 = triangle->v1;
   1305 			Vertex &v2 = triangle->v2;
   1306 
   1307 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
   1308 			{
   1309 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
   1310 
   1311 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
   1312 
   1313 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1314 				{
   1315 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1316 					{
   1317 						continue;
   1318 					}
   1319 				}
   1320 
   1321 				if(setupRoutine(primitive, triangle, &polygon, data))
   1322 				{
   1323 					primitive += ms;
   1324 					visible++;
   1325 				}
   1326 			}
   1327 		}
   1328 
   1329 		return visible;
   1330 	}
   1331 
   1332 	int Renderer::setupLines(int unit, int count)
   1333 	{
   1334 		Triangle *triangle = triangleBatch[unit];
   1335 		Primitive *primitive = primitiveBatch[unit];
   1336 		int visible = 0;
   1337 
   1338 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1339 		SetupProcessor::State &state = draw.setupState;
   1340 
   1341 		int ms = state.multiSample;
   1342 
   1343 		for(int i = 0; i < count; i++)
   1344 		{
   1345 			if(setupLine(*primitive, *triangle, draw))
   1346 			{
   1347 				primitive += ms;
   1348 				visible++;
   1349 			}
   1350 
   1351 			triangle++;
   1352 		}
   1353 
   1354 		return visible;
   1355 	}
   1356 
   1357 	int Renderer::setupPoints(int unit, int count)
   1358 	{
   1359 		Triangle *triangle = triangleBatch[unit];
   1360 		Primitive *primitive = primitiveBatch[unit];
   1361 		int visible = 0;
   1362 
   1363 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1364 		SetupProcessor::State &state = draw.setupState;
   1365 
   1366 		int ms = state.multiSample;
   1367 
   1368 		for(int i = 0; i < count; i++)
   1369 		{
   1370 			if(setupPoint(*primitive, *triangle, draw))
   1371 			{
   1372 				primitive += ms;
   1373 				visible++;
   1374 			}
   1375 
   1376 			triangle++;
   1377 		}
   1378 
   1379 		return visible;
   1380 	}
   1381 
   1382 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1383 	{
   1384 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1385 		const SetupProcessor::State &state = draw.setupState;
   1386 		const DrawData &data = *draw.data;
   1387 
   1388 		float lineWidth = data.lineWidth;
   1389 
   1390 		Vertex &v0 = triangle.v0;
   1391 		Vertex &v1 = triangle.v1;
   1392 
   1393 		int pos = state.positionRegister;
   1394 
   1395 		const float4 &P0 = v0.v[pos];
   1396 		const float4 &P1 = v1.v[pos];
   1397 
   1398 		if(P0.w <= 0 && P1.w <= 0)
   1399 		{
   1400 			return false;
   1401 		}
   1402 
   1403 		const float W = data.Wx16[0] * (1.0f / 16.0f);
   1404 		const float H = data.Hx16[0] * (1.0f / 16.0f);
   1405 
   1406 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
   1407 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
   1408 
   1409 		if(dx == 0 && dy == 0)
   1410 		{
   1411 			return false;
   1412 		}
   1413 
   1414 		if(state.multiSample > 1)   // Rectangle
   1415 		{
   1416 			float4 P[4];
   1417 			int C[4];
   1418 
   1419 			P[0] = P0;
   1420 			P[1] = P1;
   1421 			P[2] = P1;
   1422 			P[3] = P0;
   1423 
   1424 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
   1425 
   1426 			dx *= scale;
   1427 			dy *= scale;
   1428 
   1429 			float dx0h = dx * P0.w / H;
   1430 			float dy0w = dy * P0.w / W;
   1431 
   1432 			float dx1h = dx * P1.w / H;
   1433 			float dy1w = dy * P1.w / W;
   1434 
   1435 			P[0].x += -dy0w;
   1436 			P[0].y += +dx0h;
   1437 			C[0] = clipper->computeClipFlags(P[0]);
   1438 
   1439 			P[1].x += -dy1w;
   1440 			P[1].y += +dx1h;
   1441 			C[1] = clipper->computeClipFlags(P[1]);
   1442 
   1443 			P[2].x += +dy1w;
   1444 			P[2].y += -dx1h;
   1445 			C[2] = clipper->computeClipFlags(P[2]);
   1446 
   1447 			P[3].x += +dy0w;
   1448 			P[3].y += -dx0h;
   1449 			C[3] = clipper->computeClipFlags(P[3]);
   1450 
   1451 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1452 			{
   1453 				Polygon polygon(P, 4);
   1454 
   1455 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1456 
   1457 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1458 				{
   1459 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1460 					{
   1461 						return false;
   1462 					}
   1463 				}
   1464 
   1465 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1466 			}
   1467 		}
   1468 		else   // Diamond test convention
   1469 		{
   1470 			float4 P[8];
   1471 			int C[8];
   1472 
   1473 			P[0] = P0;
   1474 			P[1] = P0;
   1475 			P[2] = P0;
   1476 			P[3] = P0;
   1477 			P[4] = P1;
   1478 			P[5] = P1;
   1479 			P[6] = P1;
   1480 			P[7] = P1;
   1481 
   1482 			float dx0 = lineWidth * 0.5f * P0.w / W;
   1483 			float dy0 = lineWidth * 0.5f * P0.w / H;
   1484 
   1485 			float dx1 = lineWidth * 0.5f * P1.w / W;
   1486 			float dy1 = lineWidth * 0.5f * P1.w / H;
   1487 
   1488 			P[0].x += -dx0;
   1489 			C[0] = clipper->computeClipFlags(P[0]);
   1490 
   1491 			P[1].y += +dy0;
   1492 			C[1] = clipper->computeClipFlags(P[1]);
   1493 
   1494 			P[2].x += +dx0;
   1495 			C[2] = clipper->computeClipFlags(P[2]);
   1496 
   1497 			P[3].y += -dy0;
   1498 			C[3] = clipper->computeClipFlags(P[3]);
   1499 
   1500 			P[4].x += -dx1;
   1501 			C[4] = clipper->computeClipFlags(P[4]);
   1502 
   1503 			P[5].y += +dy1;
   1504 			C[5] = clipper->computeClipFlags(P[5]);
   1505 
   1506 			P[6].x += +dx1;
   1507 			C[6] = clipper->computeClipFlags(P[6]);
   1508 
   1509 			P[7].y += -dy1;
   1510 			C[7] = clipper->computeClipFlags(P[7]);
   1511 
   1512 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
   1513 			{
   1514 				float4 L[6];
   1515 
   1516 				if(dx > -dy)
   1517 				{
   1518 					if(dx > dy)   // Right
   1519 					{
   1520 						L[0] = P[0];
   1521 						L[1] = P[1];
   1522 						L[2] = P[5];
   1523 						L[3] = P[6];
   1524 						L[4] = P[7];
   1525 						L[5] = P[3];
   1526 					}
   1527 					else   // Down
   1528 					{
   1529 						L[0] = P[0];
   1530 						L[1] = P[4];
   1531 						L[2] = P[5];
   1532 						L[3] = P[6];
   1533 						L[4] = P[2];
   1534 						L[5] = P[3];
   1535 					}
   1536 				}
   1537 				else
   1538 				{
   1539 					if(dx > dy)   // Up
   1540 					{
   1541 						L[0] = P[0];
   1542 						L[1] = P[1];
   1543 						L[2] = P[2];
   1544 						L[3] = P[6];
   1545 						L[4] = P[7];
   1546 						L[5] = P[4];
   1547 					}
   1548 					else   // Left
   1549 					{
   1550 						L[0] = P[1];
   1551 						L[1] = P[2];
   1552 						L[2] = P[3];
   1553 						L[3] = P[7];
   1554 						L[4] = P[4];
   1555 						L[5] = P[5];
   1556 					}
   1557 				}
   1558 
   1559 				Polygon polygon(L, 6);
   1560 
   1561 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
   1562 
   1563 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1564 				{
   1565 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1566 					{
   1567 						return false;
   1568 					}
   1569 				}
   1570 
   1571 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1572 			}
   1573 		}
   1574 
   1575 		return false;
   1576 	}
   1577 
   1578 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1579 	{
   1580 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1581 		const SetupProcessor::State &state = draw.setupState;
   1582 		const DrawData &data = *draw.data;
   1583 
   1584 		Vertex &v = triangle.v0;
   1585 
   1586 		float pSize;
   1587 
   1588 		int pts = state.pointSizeRegister;
   1589 
   1590 		if(state.pointSizeRegister != Unused)
   1591 		{
   1592 			pSize = v.v[pts].y;
   1593 		}
   1594 		else
   1595 		{
   1596 			pSize = 1.0f;
   1597 		}
   1598 
   1599 		pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax);
   1600 
   1601 		float4 P[4];
   1602 		int C[4];
   1603 
   1604 		int pos = state.positionRegister;
   1605 
   1606 		P[0] = v.v[pos];
   1607 		P[1] = v.v[pos];
   1608 		P[2] = v.v[pos];
   1609 		P[3] = v.v[pos];
   1610 
   1611 		const float X = pSize * P[0].w * data.halfPixelX[0];
   1612 		const float Y = pSize * P[0].w * data.halfPixelY[0];
   1613 
   1614 		P[0].x -= X;
   1615 		P[0].y += Y;
   1616 		C[0] = clipper->computeClipFlags(P[0]);
   1617 
   1618 		P[1].x += X;
   1619 		P[1].y += Y;
   1620 		C[1] = clipper->computeClipFlags(P[1]);
   1621 
   1622 		P[2].x += X;
   1623 		P[2].y -= Y;
   1624 		C[2] = clipper->computeClipFlags(P[2]);
   1625 
   1626 		P[3].x -= X;
   1627 		P[3].y -= Y;
   1628 		C[3] = clipper->computeClipFlags(P[3]);
   1629 
   1630 		triangle.v1 = triangle.v0;
   1631 		triangle.v2 = triangle.v0;
   1632 
   1633 		triangle.v1.X += iround(16 * 0.5f * pSize);
   1634 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
   1635 
   1636 		Polygon polygon(P, 4);
   1637 
   1638 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1639 		{
   1640 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1641 
   1642 			if(clipFlagsOr != Clipper::CLIP_FINITE)
   1643 			{
   1644 				if(!clipper->clip(polygon, clipFlagsOr, draw))
   1645 				{
   1646 					return false;
   1647 				}
   1648 			}
   1649 
   1650 			return setupRoutine(&primitive, &triangle, &polygon, &data);
   1651 		}
   1652 
   1653 		return false;
   1654 	}
   1655 
   1656 	void Renderer::initializeThreads()
   1657 	{
   1658 		unitCount = ceilPow2(threadCount);
   1659 		clusterCount = ceilPow2(threadCount);
   1660 
   1661 		for(int i = 0; i < unitCount; i++)
   1662 		{
   1663 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
   1664 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
   1665 		}
   1666 
   1667 		for(int i = 0; i < threadCount; i++)
   1668 		{
   1669 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
   1670 			vertexTask[i]->vertexCache.drawCall = -1;
   1671 
   1672 			task[i].type = Task::SUSPEND;
   1673 
   1674 			resume[i] = new Event();
   1675 			suspend[i] = new Event();
   1676 
   1677 			Parameters parameters;
   1678 			parameters.threadIndex = i;
   1679 			parameters.renderer = this;
   1680 
   1681 			exitThreads = false;
   1682 			worker[i] = new Thread(threadFunction, &parameters);
   1683 
   1684 			suspend[i]->wait();
   1685 			suspend[i]->signal();
   1686 		}
   1687 	}
   1688 
   1689 	void Renderer::terminateThreads()
   1690 	{
   1691 		while(threadsAwake != 0)
   1692 		{
   1693 			Thread::sleep(1);
   1694 		}
   1695 
   1696 		for(int thread = 0; thread < threadCount; thread++)
   1697 		{
   1698 			if(worker[thread])
   1699 			{
   1700 				exitThreads = true;
   1701 				resume[thread]->signal();
   1702 				worker[thread]->join();
   1703 
   1704 				delete worker[thread];
   1705 				worker[thread] = 0;
   1706 				delete resume[thread];
   1707 				resume[thread] = 0;
   1708 				delete suspend[thread];
   1709 				suspend[thread] = 0;
   1710 			}
   1711 
   1712 			deallocate(vertexTask[thread]);
   1713 			vertexTask[thread] = 0;
   1714 		}
   1715 
   1716 		for(int i = 0; i < 16; i++)
   1717 		{
   1718 			deallocate(triangleBatch[i]);
   1719 			triangleBatch[i] = 0;
   1720 
   1721 			deallocate(primitiveBatch[i]);
   1722 			primitiveBatch[i] = 0;
   1723 		}
   1724 	}
   1725 
   1726 	void Renderer::loadConstants(const VertexShader *vertexShader)
   1727 	{
   1728 		size_t count = vertexShader->getLength();
   1729 
   1730 		for(size_t i = 0; i < count; i++)
   1731 		{
   1732 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
   1733 
   1734 			if(instruction->opcode == Shader::OPCODE_DEF)
   1735 			{
   1736 				int index = instruction->dst.index;
   1737 				float value[4];
   1738 
   1739 				value[0] = instruction->src[0].value[0];
   1740 				value[1] = instruction->src[0].value[1];
   1741 				value[2] = instruction->src[0].value[2];
   1742 				value[3] = instruction->src[0].value[3];
   1743 
   1744 				setVertexShaderConstantF(index, value);
   1745 			}
   1746 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   1747 			{
   1748 				int index = instruction->dst.index;
   1749 				int integer[4];
   1750 
   1751 				integer[0] = instruction->src[0].integer[0];
   1752 				integer[1] = instruction->src[0].integer[1];
   1753 				integer[2] = instruction->src[0].integer[2];
   1754 				integer[3] = instruction->src[0].integer[3];
   1755 
   1756 				setVertexShaderConstantI(index, integer);
   1757 			}
   1758 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   1759 			{
   1760 				int index = instruction->dst.index;
   1761 				int boolean = instruction->src[0].boolean[0];
   1762 
   1763 				setVertexShaderConstantB(index, &boolean);
   1764 			}
   1765 		}
   1766 	}
   1767 
   1768 	void Renderer::loadConstants(const PixelShader *pixelShader)
   1769 	{
   1770 		if(!pixelShader) return;
   1771 
   1772 		size_t count = pixelShader->getLength();
   1773 
   1774 		for(size_t i = 0; i < count; i++)
   1775 		{
   1776 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
   1777 
   1778 			if(instruction->opcode == Shader::OPCODE_DEF)
   1779 			{
   1780 				int index = instruction->dst.index;
   1781 				float value[4];
   1782 
   1783 				value[0] = instruction->src[0].value[0];
   1784 				value[1] = instruction->src[0].value[1];
   1785 				value[2] = instruction->src[0].value[2];
   1786 				value[3] = instruction->src[0].value[3];
   1787 
   1788 				setPixelShaderConstantF(index, value);
   1789 			}
   1790 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   1791 			{
   1792 				int index = instruction->dst.index;
   1793 				int integer[4];
   1794 
   1795 				integer[0] = instruction->src[0].integer[0];
   1796 				integer[1] = instruction->src[0].integer[1];
   1797 				integer[2] = instruction->src[0].integer[2];
   1798 				integer[3] = instruction->src[0].integer[3];
   1799 
   1800 				setPixelShaderConstantI(index, integer);
   1801 			}
   1802 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   1803 			{
   1804 				int index = instruction->dst.index;
   1805 				int boolean = instruction->src[0].boolean[0];
   1806 
   1807 				setPixelShaderConstantB(index, &boolean);
   1808 			}
   1809 		}
   1810 	}
   1811 
   1812 	void Renderer::setIndexBuffer(Resource *indexBuffer)
   1813 	{
   1814 		context->indexBuffer = indexBuffer;
   1815 	}
   1816 
   1817 	void Renderer::setMultiSampleMask(unsigned int mask)
   1818 	{
   1819 		context->sampleMask = mask;
   1820 	}
   1821 
   1822 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
   1823 	{
   1824 		sw::transparencyAntialiasing = transparencyAntialiasing;
   1825 	}
   1826 
   1827 	bool Renderer::isReadWriteTexture(int sampler)
   1828 	{
   1829 		for(int index = 0; index < RENDERTARGETS; index++)
   1830 		{
   1831 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
   1832 			{
   1833 				return true;
   1834 			}
   1835 		}
   1836 
   1837 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
   1838 		{
   1839 			return true;
   1840 		}
   1841 
   1842 		return false;
   1843 	}
   1844 
   1845 	void Renderer::updateClipper()
   1846 	{
   1847 		if(updateClipPlanes)
   1848 		{
   1849 			if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
   1850 			if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
   1851 			if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
   1852 			if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
   1853 			if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
   1854 			if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
   1855 
   1856 			updateClipPlanes = false;
   1857 		}
   1858 	}
   1859 
   1860 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
   1861 	{
   1862 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
   1863 
   1864 		context->texture[sampler] = resource;
   1865 	}
   1866 
   1867 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
   1868 	{
   1869 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
   1870 
   1871 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
   1872 	}
   1873 
   1874 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
   1875 	{
   1876 		if(type == SAMPLER_PIXEL)
   1877 		{
   1878 			PixelProcessor::setTextureFilter(sampler, textureFilter);
   1879 		}
   1880 		else
   1881 		{
   1882 			VertexProcessor::setTextureFilter(sampler, textureFilter);
   1883 		}
   1884 	}
   1885 
   1886 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
   1887 	{
   1888 		if(type == SAMPLER_PIXEL)
   1889 		{
   1890 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
   1891 		}
   1892 		else
   1893 		{
   1894 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
   1895 		}
   1896 	}
   1897 
   1898 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
   1899 	{
   1900 		if(type == SAMPLER_PIXEL)
   1901 		{
   1902 			PixelProcessor::setGatherEnable(sampler, enable);
   1903 		}
   1904 		else
   1905 		{
   1906 			VertexProcessor::setGatherEnable(sampler, enable);
   1907 		}
   1908 	}
   1909 
   1910 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
   1911 	{
   1912 		if(type == SAMPLER_PIXEL)
   1913 		{
   1914 			PixelProcessor::setAddressingModeU(sampler, addressMode);
   1915 		}
   1916 		else
   1917 		{
   1918 			VertexProcessor::setAddressingModeU(sampler, addressMode);
   1919 		}
   1920 	}
   1921 
   1922 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
   1923 	{
   1924 		if(type == SAMPLER_PIXEL)
   1925 		{
   1926 			PixelProcessor::setAddressingModeV(sampler, addressMode);
   1927 		}
   1928 		else
   1929 		{
   1930 			VertexProcessor::setAddressingModeV(sampler, addressMode);
   1931 		}
   1932 	}
   1933 
   1934 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
   1935 	{
   1936 		if(type == SAMPLER_PIXEL)
   1937 		{
   1938 			PixelProcessor::setAddressingModeW(sampler, addressMode);
   1939 		}
   1940 		else
   1941 		{
   1942 			VertexProcessor::setAddressingModeW(sampler, addressMode);
   1943 		}
   1944 	}
   1945 
   1946 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
   1947 	{
   1948 		if(type == SAMPLER_PIXEL)
   1949 		{
   1950 			PixelProcessor::setReadSRGB(sampler, sRGB);
   1951 		}
   1952 		else
   1953 		{
   1954 			VertexProcessor::setReadSRGB(sampler, sRGB);
   1955 		}
   1956 	}
   1957 
   1958 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
   1959 	{
   1960 		if(type == SAMPLER_PIXEL)
   1961 		{
   1962 			PixelProcessor::setMipmapLOD(sampler, bias);
   1963 		}
   1964 		else
   1965 		{
   1966 			VertexProcessor::setMipmapLOD(sampler, bias);
   1967 		}
   1968 	}
   1969 
   1970 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
   1971 	{
   1972 		if(type == SAMPLER_PIXEL)
   1973 		{
   1974 			PixelProcessor::setBorderColor(sampler, borderColor);
   1975 		}
   1976 		else
   1977 		{
   1978 			VertexProcessor::setBorderColor(sampler, borderColor);
   1979 		}
   1980 	}
   1981 
   1982 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
   1983 	{
   1984 		if(type == SAMPLER_PIXEL)
   1985 		{
   1986 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   1987 		}
   1988 		else
   1989 		{
   1990 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   1991 		}
   1992 	}
   1993 
   1994 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
   1995 	{
   1996 		if(type == SAMPLER_PIXEL)
   1997 		{
   1998 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   1999 		}
   2000 		else
   2001 		{
   2002 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2003 		}
   2004 	}
   2005 
   2006 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
   2007 	{
   2008 		if(type == SAMPLER_PIXEL)
   2009 		{
   2010 			PixelProcessor::setSwizzleR(sampler, swizzleR);
   2011 		}
   2012 		else
   2013 		{
   2014 			VertexProcessor::setSwizzleR(sampler, swizzleR);
   2015 		}
   2016 	}
   2017 
   2018 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
   2019 	{
   2020 		if(type == SAMPLER_PIXEL)
   2021 		{
   2022 			PixelProcessor::setSwizzleG(sampler, swizzleG);
   2023 		}
   2024 		else
   2025 		{
   2026 			VertexProcessor::setSwizzleG(sampler, swizzleG);
   2027 		}
   2028 	}
   2029 
   2030 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
   2031 	{
   2032 		if(type == SAMPLER_PIXEL)
   2033 		{
   2034 			PixelProcessor::setSwizzleB(sampler, swizzleB);
   2035 		}
   2036 		else
   2037 		{
   2038 			VertexProcessor::setSwizzleB(sampler, swizzleB);
   2039 		}
   2040 	}
   2041 
   2042 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
   2043 	{
   2044 		if(type == SAMPLER_PIXEL)
   2045 		{
   2046 			PixelProcessor::setSwizzleA(sampler, swizzleA);
   2047 		}
   2048 		else
   2049 		{
   2050 			VertexProcessor::setSwizzleA(sampler, swizzleA);
   2051 		}
   2052 	}
   2053 
   2054 	void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
   2055 	{
   2056 		if(type == SAMPLER_PIXEL)
   2057 		{
   2058 			PixelProcessor::setCompareFunc(sampler, compFunc);
   2059 		}
   2060 		else
   2061 		{
   2062 			VertexProcessor::setCompareFunc(sampler, compFunc);
   2063 		}
   2064 	}
   2065 
   2066 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
   2067 	{
   2068 		if(type == SAMPLER_PIXEL)
   2069 		{
   2070 			PixelProcessor::setBaseLevel(sampler, baseLevel);
   2071 		}
   2072 		else
   2073 		{
   2074 			VertexProcessor::setBaseLevel(sampler, baseLevel);
   2075 		}
   2076 	}
   2077 
   2078 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
   2079 	{
   2080 		if(type == SAMPLER_PIXEL)
   2081 		{
   2082 			PixelProcessor::setMaxLevel(sampler, maxLevel);
   2083 		}
   2084 		else
   2085 		{
   2086 			VertexProcessor::setMaxLevel(sampler, maxLevel);
   2087 		}
   2088 	}
   2089 
   2090 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
   2091 	{
   2092 		if(type == SAMPLER_PIXEL)
   2093 		{
   2094 			PixelProcessor::setMinLod(sampler, minLod);
   2095 		}
   2096 		else
   2097 		{
   2098 			VertexProcessor::setMinLod(sampler, minLod);
   2099 		}
   2100 	}
   2101 
   2102 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
   2103 	{
   2104 		if(type == SAMPLER_PIXEL)
   2105 		{
   2106 			PixelProcessor::setMaxLod(sampler, maxLod);
   2107 		}
   2108 		else
   2109 		{
   2110 			VertexProcessor::setMaxLod(sampler, maxLod);
   2111 		}
   2112 	}
   2113 
   2114 	void Renderer::setLineWidth(float width)
   2115 	{
   2116 		context->lineWidth = width;
   2117 	}
   2118 
   2119 	void Renderer::setDepthBias(float bias)
   2120 	{
   2121 		context->depthBias = bias;
   2122 	}
   2123 
   2124 	void Renderer::setSlopeDepthBias(float slopeBias)
   2125 	{
   2126 		context->slopeDepthBias = slopeBias;
   2127 	}
   2128 
   2129 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
   2130 	{
   2131 		context->rasterizerDiscard = rasterizerDiscard;
   2132 	}
   2133 
   2134 	void Renderer::setPixelShader(const PixelShader *shader)
   2135 	{
   2136 		context->pixelShader = shader;
   2137 
   2138 		loadConstants(shader);
   2139 	}
   2140 
   2141 	void Renderer::setVertexShader(const VertexShader *shader)
   2142 	{
   2143 		context->vertexShader = shader;
   2144 
   2145 		loadConstants(shader);
   2146 	}
   2147 
   2148 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2149 	{
   2150 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2151 		{
   2152 			if(drawCall[i]->psDirtyConstF < index + count)
   2153 			{
   2154 				drawCall[i]->psDirtyConstF = index + count;
   2155 			}
   2156 		}
   2157 
   2158 		for(unsigned int i = 0; i < count; i++)
   2159 		{
   2160 			PixelProcessor::setFloatConstant(index + i, value);
   2161 			value += 4;
   2162 		}
   2163 	}
   2164 
   2165 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2166 	{
   2167 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2168 		{
   2169 			if(drawCall[i]->psDirtyConstI < index + count)
   2170 			{
   2171 				drawCall[i]->psDirtyConstI = index + count;
   2172 			}
   2173 		}
   2174 
   2175 		for(unsigned int i = 0; i < count; i++)
   2176 		{
   2177 			PixelProcessor::setIntegerConstant(index + i, value);
   2178 			value += 4;
   2179 		}
   2180 	}
   2181 
   2182 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2183 	{
   2184 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2185 		{
   2186 			if(drawCall[i]->psDirtyConstB < index + count)
   2187 			{
   2188 				drawCall[i]->psDirtyConstB = index + count;
   2189 			}
   2190 		}
   2191 
   2192 		for(unsigned int i = 0; i < count; i++)
   2193 		{
   2194 			PixelProcessor::setBooleanConstant(index + i, *boolean);
   2195 			boolean++;
   2196 		}
   2197 	}
   2198 
   2199 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2200 	{
   2201 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2202 		{
   2203 			if(drawCall[i]->vsDirtyConstF < index + count)
   2204 			{
   2205 				drawCall[i]->vsDirtyConstF = index + count;
   2206 			}
   2207 		}
   2208 
   2209 		for(unsigned int i = 0; i < count; i++)
   2210 		{
   2211 			VertexProcessor::setFloatConstant(index + i, value);
   2212 			value += 4;
   2213 		}
   2214 	}
   2215 
   2216 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2217 	{
   2218 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2219 		{
   2220 			if(drawCall[i]->vsDirtyConstI < index + count)
   2221 			{
   2222 				drawCall[i]->vsDirtyConstI = index + count;
   2223 			}
   2224 		}
   2225 
   2226 		for(unsigned int i = 0; i < count; i++)
   2227 		{
   2228 			VertexProcessor::setIntegerConstant(index + i, value);
   2229 			value += 4;
   2230 		}
   2231 	}
   2232 
   2233 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2234 	{
   2235 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2236 		{
   2237 			if(drawCall[i]->vsDirtyConstB < index + count)
   2238 			{
   2239 				drawCall[i]->vsDirtyConstB = index + count;
   2240 			}
   2241 		}
   2242 
   2243 		for(unsigned int i = 0; i < count; i++)
   2244 		{
   2245 			VertexProcessor::setBooleanConstant(index + i, *boolean);
   2246 			boolean++;
   2247 		}
   2248 	}
   2249 
   2250 	void Renderer::addQuery(Query *query)
   2251 	{
   2252 		queries.push_back(query);
   2253 	}
   2254 
   2255 	void Renderer::removeQuery(Query *query)
   2256 	{
   2257 		queries.remove(query);
   2258 	}
   2259 
   2260 	#if PERF_HUD
   2261 		int Renderer::getThreadCount()
   2262 		{
   2263 			return threadCount;
   2264 		}
   2265 
   2266 		int64_t Renderer::getVertexTime(int thread)
   2267 		{
   2268 			return vertexTime[thread];
   2269 		}
   2270 
   2271 		int64_t Renderer::getSetupTime(int thread)
   2272 		{
   2273 			return setupTime[thread];
   2274 		}
   2275 
   2276 		int64_t Renderer::getPixelTime(int thread)
   2277 		{
   2278 			return pixelTime[thread];
   2279 		}
   2280 
   2281 		void Renderer::resetTimers()
   2282 		{
   2283 			for(int thread = 0; thread < threadCount; thread++)
   2284 			{
   2285 				vertexTime[thread] = 0;
   2286 				setupTime[thread] = 0;
   2287 				pixelTime[thread] = 0;
   2288 			}
   2289 		}
   2290 	#endif
   2291 
   2292 	void Renderer::setContext(const sw::Context& context)
   2293 	{
   2294 		*(this->context) = context;
   2295 	}
   2296 
   2297 	void Renderer::setViewport(const VkViewport &viewport)
   2298 	{
   2299 		this->viewport = viewport;
   2300 	}
   2301 
   2302 	void Renderer::setScissor(const Rect &scissor)
   2303 	{
   2304 		this->scissor = scissor;
   2305 	}
   2306 
   2307 	void Renderer::setClipFlags(int flags)
   2308 	{
   2309 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
   2310 	}
   2311 
   2312 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
   2313 	{
   2314 		if(index < MAX_CLIP_PLANES)
   2315 		{
   2316 			userPlane[index] = plane;
   2317 		}
   2318 		else ASSERT(false);
   2319 
   2320 		updateClipPlanes = true;
   2321 	}
   2322 
   2323 	void Renderer::updateConfiguration(bool initialUpdate)
   2324 	{
   2325 		bool newConfiguration = swiftConfig->hasNewConfiguration();
   2326 
   2327 		if(newConfiguration || initialUpdate)
   2328 		{
   2329 			terminateThreads();
   2330 
   2331 			SwiftConfig::Configuration configuration = {};
   2332 			swiftConfig->getConfiguration(configuration);
   2333 
   2334 			precacheVertex = !newConfiguration && configuration.precache;
   2335 			precacheSetup = !newConfiguration && configuration.precache;
   2336 			precachePixel = !newConfiguration && configuration.precache;
   2337 
   2338 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
   2339 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
   2340 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
   2341 
   2342 			switch(configuration.textureSampleQuality)
   2343 			{
   2344 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
   2345 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
   2346 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2347 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2348 			}
   2349 
   2350 			switch(configuration.mipmapQuality)
   2351 			{
   2352 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
   2353 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2354 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2355 			}
   2356 
   2357 			setPerspectiveCorrection(configuration.perspectiveCorrection);
   2358 
   2359 			switch(configuration.transcendentalPrecision)
   2360 			{
   2361 			case 0:
   2362 				logPrecision = APPROXIMATE;
   2363 				expPrecision = APPROXIMATE;
   2364 				rcpPrecision = APPROXIMATE;
   2365 				rsqPrecision = APPROXIMATE;
   2366 				break;
   2367 			case 1:
   2368 				logPrecision = PARTIAL;
   2369 				expPrecision = PARTIAL;
   2370 				rcpPrecision = PARTIAL;
   2371 				rsqPrecision = PARTIAL;
   2372 				break;
   2373 			case 2:
   2374 				logPrecision = ACCURATE;
   2375 				expPrecision = ACCURATE;
   2376 				rcpPrecision = ACCURATE;
   2377 				rsqPrecision = ACCURATE;
   2378 				break;
   2379 			case 3:
   2380 				logPrecision = WHQL;
   2381 				expPrecision = WHQL;
   2382 				rcpPrecision = WHQL;
   2383 				rsqPrecision = WHQL;
   2384 				break;
   2385 			case 4:
   2386 				logPrecision = IEEE;
   2387 				expPrecision = IEEE;
   2388 				rcpPrecision = IEEE;
   2389 				rsqPrecision = IEEE;
   2390 				break;
   2391 			default:
   2392 				logPrecision = ACCURATE;
   2393 				expPrecision = ACCURATE;
   2394 				rcpPrecision = ACCURATE;
   2395 				rsqPrecision = ACCURATE;
   2396 				break;
   2397 			}
   2398 
   2399 			switch(configuration.transparencyAntialiasing)
   2400 			{
   2401 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2402 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
   2403 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2404 			}
   2405 
   2406 			switch(configuration.threadCount)
   2407 			{
   2408 			case -1: threadCount = CPUID::coreCount();        break;
   2409 			case 0:  threadCount = CPUID::processAffinity();  break;
   2410 			default: threadCount = configuration.threadCount; break;
   2411 			}
   2412 
   2413 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
   2414 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
   2415 			CPUID::setEnableSSE3(configuration.enableSSE3);
   2416 			CPUID::setEnableSSE2(configuration.enableSSE2);
   2417 			CPUID::setEnableSSE(configuration.enableSSE);
   2418 
   2419 			for(int pass = 0; pass < 10; pass++)
   2420 			{
   2421 				optimization[pass] = configuration.optimization[pass];
   2422 			}
   2423 
   2424 			forceWindowed = configuration.forceWindowed;
   2425 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
   2426 			postBlendSRGB = configuration.postBlendSRGB;
   2427 			exactColorRounding = configuration.exactColorRounding;
   2428 			forceClearRegisters = configuration.forceClearRegisters;
   2429 
   2430 		#ifndef NDEBUG
   2431 			minPrimitives = configuration.minPrimitives;
   2432 			maxPrimitives = configuration.maxPrimitives;
   2433 		#endif
   2434 		}
   2435 
   2436 		if(!initialUpdate && !worker[0])
   2437 		{
   2438 			initializeThreads();
   2439 		}
   2440 	}
   2441 }
   2442