Home | History | Annotate | Download | only in Renderer
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Renderer.hpp"
     16 
     17 #include "Clipper.hpp"
     18 #include "Surface.hpp"
     19 #include "Primitive.hpp"
     20 #include "Polygon.hpp"
     21 #include "Main/FrameBuffer.hpp"
     22 #include "Main/SwiftConfig.hpp"
     23 #include "Reactor/Reactor.hpp"
     24 #include "Shader/Constants.hpp"
     25 #include "Common/MutexLock.hpp"
     26 #include "Common/CPUID.hpp"
     27 #include "Common/Memory.hpp"
     28 #include "Common/Resource.hpp"
     29 #include "Common/Half.hpp"
     30 #include "Common/Math.hpp"
     31 #include "Common/Timer.hpp"
     32 #include "Common/Debug.hpp"
     33 
     34 #undef max
     35 
     36 bool disableServer = true;
     37 
     38 #ifndef NDEBUG
     39 unsigned int minPrimitives = 1;
     40 unsigned int maxPrimitives = 1 << 21;
     41 #endif
     42 
     43 namespace sw
     44 {
     45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
     46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
     47 	extern bool booleanFaceRegister;
     48 	extern bool fullPixelPositionRegister;
     49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
     50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
     51 	extern bool colorsDefaultToZero;
     52 
     53 	extern bool forceWindowed;
     54 	extern bool complementaryDepthBuffer;
     55 	extern bool postBlendSRGB;
     56 	extern bool exactColorRounding;
     57 	extern TransparencyAntialiasing transparencyAntialiasing;
     58 	extern bool forceClearRegisters;
     59 
     60 	extern bool precacheVertex;
     61 	extern bool precacheSetup;
     62 	extern bool precachePixel;
     63 
     64 	static const int batchSize = 128;
     65 	AtomicInt threadCount(1);
     66 	AtomicInt Renderer::unitCount(1);
     67 	AtomicInt Renderer::clusterCount(1);
     68 
     69 	TranscendentalPrecision logPrecision = ACCURATE;
     70 	TranscendentalPrecision expPrecision = ACCURATE;
     71 	TranscendentalPrecision rcpPrecision = ACCURATE;
     72 	TranscendentalPrecision rsqPrecision = ACCURATE;
     73 	bool perspectiveCorrection = true;
     74 
     75 	static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding)
     76 	{
     77 		static bool initialized = false;
     78 
     79 		if(!initialized)
     80 		{
     81 			sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
     82 			sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
     83 			sw::booleanFaceRegister = conventions.booleanFaceRegister;
     84 			sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
     85 			sw::leadingVertexFirst = conventions.leadingVertexFirst;
     86 			sw::secondaryColor = conventions.secondaryColor;
     87 			sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
     88 			sw::exactColorRounding = exactColorRounding;
     89 			initialized = true;
     90 		}
     91 	}
     92 
     93 	struct Parameters
     94 	{
     95 		Renderer *renderer;
     96 		int threadIndex;
     97 	};
     98 
     99 	Query::Query(Type type) : building(false), data(0), type(type), reference(1)
    100 	{
    101 	}
    102 
    103 	void Query::addRef()
    104 	{
    105 		++reference; // Atomic
    106 	}
    107 
    108 	void Query::release()
    109 	{
    110 		int ref = reference--; // Atomic
    111 
    112 		ASSERT(ref >= 0);
    113 
    114 		if(ref == 0)
    115 		{
    116 			delete this;
    117 		}
    118 	}
    119 
    120 	DrawCall::DrawCall()
    121 	{
    122 		queries = 0;
    123 
    124 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
    125 		vsDirtyConstI = 16;
    126 		vsDirtyConstB = 16;
    127 
    128 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
    129 		psDirtyConstI = 16;
    130 		psDirtyConstB = 16;
    131 
    132 		references = -1;
    133 
    134 		data = (DrawData*)allocate(sizeof(DrawData));
    135 		data->constants = &constants;
    136 	}
    137 
    138 	DrawCall::~DrawCall()
    139 	{
    140 		delete queries;
    141 
    142 		deallocate(data);
    143 	}
    144 
    145 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
    146 	{
    147 		setGlobalRenderingSettings(conventions, exactColorRounding);
    148 
    149 		setRenderTarget(0, 0);
    150 		clipper = new Clipper(symmetricNormalizedDepth);
    151 		blitter = new Blitter;
    152 
    153 		updateViewMatrix = true;
    154 		updateBaseMatrix = true;
    155 		updateProjectionMatrix = true;
    156 		updateClipPlanes = true;
    157 
    158 		#if PERF_HUD
    159 			resetTimers();
    160 		#endif
    161 
    162 		for(int i = 0; i < 16; i++)
    163 		{
    164 			vertexTask[i] = 0;
    165 
    166 			worker[i] = 0;
    167 			resume[i] = 0;
    168 			suspend[i] = 0;
    169 		}
    170 
    171 		threadsAwake = 0;
    172 		resumeApp = new Event();
    173 
    174 		currentDraw = 0;
    175 		nextDraw = 0;
    176 
    177 		qHead = 0;
    178 		qSize = 0;
    179 
    180 		for(int i = 0; i < 16; i++)
    181 		{
    182 			triangleBatch[i] = 0;
    183 			primitiveBatch[i] = 0;
    184 		}
    185 
    186 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    187 		{
    188 			drawCall[draw] = new DrawCall();
    189 			drawList[draw] = drawCall[draw];
    190 		}
    191 
    192 		for(int unit = 0; unit < 16; unit++)
    193 		{
    194 			primitiveProgress[unit].init();
    195 		}
    196 
    197 		for(int cluster = 0; cluster < 16; cluster++)
    198 		{
    199 			pixelProgress[cluster].init();
    200 		}
    201 
    202 		clipFlags = 0;
    203 
    204 		swiftConfig = new SwiftConfig(disableServer);
    205 		updateConfiguration(true);
    206 
    207 		sync = new Resource(0);
    208 	}
    209 
    210 	Renderer::~Renderer()
    211 	{
    212 		sync->destruct();
    213 
    214 		delete clipper;
    215 		clipper = nullptr;
    216 
    217 		delete blitter;
    218 		blitter = nullptr;
    219 
    220 		terminateThreads();
    221 		delete resumeApp;
    222 
    223 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    224 		{
    225 			delete drawCall[draw];
    226 		}
    227 
    228 		delete swiftConfig;
    229 	}
    230 
    231 	// This object has to be mem aligned
    232 	void* Renderer::operator new(size_t size)
    233 	{
    234 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
    235 		return sw::allocate(sizeof(Renderer), 16);
    236 	}
    237 
    238 	void Renderer::operator delete(void * mem)
    239 	{
    240 		sw::deallocate(mem);
    241 	}
    242 
    243 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
    244 	{
    245 		#ifndef NDEBUG
    246 			if(count < minPrimitives || count > maxPrimitives)
    247 			{
    248 				return;
    249 			}
    250 		#endif
    251 
    252 		context->drawType = drawType;
    253 
    254 		updateConfiguration();
    255 		updateClipper();
    256 
    257 		int ss = context->getSuperSampleCount();
    258 		int ms = context->getMultiSampleCount();
    259 		bool requiresSync = false;
    260 
    261 		for(int q = 0; q < ss; q++)
    262 		{
    263 			unsigned int oldMultiSampleMask = context->multiSampleMask;
    264 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
    265 
    266 			if(!context->multiSampleMask)
    267 			{
    268 				continue;
    269 			}
    270 
    271 			sync->lock(sw::PRIVATE);
    272 
    273 			if(update || oldMultiSampleMask != context->multiSampleMask)
    274 			{
    275 				vertexState = VertexProcessor::update(drawType);
    276 				setupState = SetupProcessor::update();
    277 				pixelState = PixelProcessor::update();
    278 
    279 				vertexRoutine = VertexProcessor::routine(vertexState);
    280 				setupRoutine = SetupProcessor::routine(setupState);
    281 				pixelRoutine = PixelProcessor::routine(pixelState);
    282 			}
    283 
    284 			int batch = batchSize / ms;
    285 
    286 			int (Renderer::*setupPrimitives)(int batch, int count);
    287 
    288 			if(context->isDrawTriangle())
    289 			{
    290 				switch(context->fillMode)
    291 				{
    292 				case FILL_SOLID:
    293 					setupPrimitives = &Renderer::setupSolidTriangles;
    294 					break;
    295 				case FILL_WIREFRAME:
    296 					setupPrimitives = &Renderer::setupWireframeTriangle;
    297 					batch = 1;
    298 					break;
    299 				case FILL_VERTEX:
    300 					setupPrimitives = &Renderer::setupVertexTriangle;
    301 					batch = 1;
    302 					break;
    303 				default:
    304 					ASSERT(false);
    305 					return;
    306 				}
    307 			}
    308 			else if(context->isDrawLine())
    309 			{
    310 				setupPrimitives = &Renderer::setupLines;
    311 			}
    312 			else   // Point draw
    313 			{
    314 				setupPrimitives = &Renderer::setupPoints;
    315 			}
    316 
    317 			DrawCall *draw = nullptr;
    318 
    319 			do
    320 			{
    321 				for(int i = 0; i < DRAW_COUNT; i++)
    322 				{
    323 					if(drawCall[i]->references == -1)
    324 					{
    325 						draw = drawCall[i];
    326 						drawList[nextDraw & DRAW_COUNT_BITS] = draw;
    327 
    328 						break;
    329 					}
    330 				}
    331 
    332 				if(!draw)
    333 				{
    334 					resumeApp->wait();
    335 				}
    336 			}
    337 			while(!draw);
    338 
    339 			DrawData *data = draw->data;
    340 
    341 			if(queries.size() != 0)
    342 			{
    343 				draw->queries = new std::list<Query*>();
    344 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
    345 				for(auto &query : queries)
    346 				{
    347 					if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
    348 					{
    349 						query->addRef();
    350 						draw->queries->push_back(query);
    351 					}
    352 				}
    353 			}
    354 
    355 			draw->drawType = drawType;
    356 			draw->batchSize = batch;
    357 
    358 			vertexRoutine->bind();
    359 			setupRoutine->bind();
    360 			pixelRoutine->bind();
    361 
    362 			draw->vertexRoutine = vertexRoutine;
    363 			draw->setupRoutine = setupRoutine;
    364 			draw->pixelRoutine = pixelRoutine;
    365 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
    366 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
    367 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
    368 			draw->setupPrimitives = setupPrimitives;
    369 			draw->setupState = setupState;
    370 
    371 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
    372 			{
    373 				draw->vertexStream[i] = context->input[i].resource;
    374 				data->input[i] = context->input[i].buffer;
    375 				data->stride[i] = context->input[i].stride;
    376 
    377 				if(draw->vertexStream[i])
    378 				{
    379 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
    380 				}
    381 			}
    382 
    383 			if(context->indexBuffer)
    384 			{
    385 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
    386 			}
    387 
    388 			draw->indexBuffer = context->indexBuffer;
    389 
    390 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
    391 			{
    392 				draw->texture[sampler] = 0;
    393 			}
    394 
    395 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
    396 			{
    397 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
    398 				{
    399 					draw->texture[sampler] = context->texture[sampler];
    400 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
    401 
    402 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
    403 
    404 					requiresSync |= context->sampler[sampler].requiresSync();
    405 				}
    406 			}
    407 
    408 			if(context->pixelShader)
    409 			{
    410 				if(draw->psDirtyConstF)
    411 				{
    412 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
    413 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
    414 					draw->psDirtyConstF = 0;
    415 				}
    416 
    417 				if(draw->psDirtyConstI)
    418 				{
    419 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
    420 					draw->psDirtyConstI = 0;
    421 				}
    422 
    423 				if(draw->psDirtyConstB)
    424 				{
    425 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
    426 					draw->psDirtyConstB = 0;
    427 				}
    428 
    429 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
    430 			}
    431 			else
    432 			{
    433 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    434 				{
    435 					draw->pUniformBuffers[i] = nullptr;
    436 				}
    437 			}
    438 
    439 			if(context->pixelShaderModel() <= 0x0104)
    440 			{
    441 				for(int stage = 0; stage < 8; stage++)
    442 				{
    443 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
    444 					{
    445 						data->textureStage[stage] = context->textureStage[stage].uniforms;
    446 					}
    447 					else break;
    448 				}
    449 			}
    450 
    451 			if(context->vertexShader)
    452 			{
    453 				if(context->vertexShader->getShaderModel() >= 0x0300)
    454 				{
    455 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
    456 					{
    457 						if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
    458 						{
    459 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
    460 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
    461 
    462 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
    463 
    464 							requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync();
    465 						}
    466 					}
    467 				}
    468 
    469 				if(draw->vsDirtyConstF)
    470 				{
    471 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
    472 					draw->vsDirtyConstF = 0;
    473 				}
    474 
    475 				if(draw->vsDirtyConstI)
    476 				{
    477 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
    478 					draw->vsDirtyConstI = 0;
    479 				}
    480 
    481 				if(draw->vsDirtyConstB)
    482 				{
    483 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
    484 					draw->vsDirtyConstB = 0;
    485 				}
    486 
    487 				if(context->vertexShader->isInstanceIdDeclared())
    488 				{
    489 					data->instanceID = context->instanceID;
    490 				}
    491 
    492 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
    493 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
    494 			}
    495 			else
    496 			{
    497 				data->ff = ff;
    498 
    499 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
    500 				draw->vsDirtyConstI = 16;
    501 				draw->vsDirtyConstB = 16;
    502 
    503 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    504 				{
    505 					draw->vUniformBuffers[i] = nullptr;
    506 				}
    507 
    508 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
    509 				{
    510 					draw->transformFeedbackBuffers[i] = nullptr;
    511 				}
    512 			}
    513 
    514 			if(pixelState.stencilActive)
    515 			{
    516 				data->stencil[0] = stencil;
    517 				data->stencil[1] = stencilCCW;
    518 			}
    519 
    520 			if(pixelState.fogActive)
    521 			{
    522 				data->fog = fog;
    523 			}
    524 
    525 			if(setupState.isDrawPoint)
    526 			{
    527 				data->point = point;
    528 			}
    529 
    530 			data->lineWidth = context->lineWidth;
    531 
    532 			data->factor = factor;
    533 
    534 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
    535 			{
    536 				float ref = context->alphaReference * (1.0f / 255.0f);
    537 				float margin = sw::min(ref, 1.0f - ref);
    538 
    539 				if(ms == 4)
    540 				{
    541 					data->a2c0 = replicate(ref - margin * 0.6f);
    542 					data->a2c1 = replicate(ref - margin * 0.2f);
    543 					data->a2c2 = replicate(ref + margin * 0.2f);
    544 					data->a2c3 = replicate(ref + margin * 0.6f);
    545 				}
    546 				else if(ms == 2)
    547 				{
    548 					data->a2c0 = replicate(ref - margin * 0.3f);
    549 					data->a2c1 = replicate(ref + margin * 0.3f);
    550 				}
    551 				else ASSERT(false);
    552 			}
    553 
    554 			if(pixelState.occlusionEnabled)
    555 			{
    556 				for(int cluster = 0; cluster < clusterCount; cluster++)
    557 				{
    558 					data->occlusion[cluster] = 0;
    559 				}
    560 			}
    561 
    562 			#if PERF_PROFILE
    563 				for(int cluster = 0; cluster < clusterCount; cluster++)
    564 				{
    565 					for(int i = 0; i < PERF_TIMERS; i++)
    566 					{
    567 						data->cycles[i][cluster] = 0;
    568 					}
    569 				}
    570 			#endif
    571 
    572 			// Viewport
    573 			{
    574 				float W = 0.5f * viewport.width;
    575 				float H = 0.5f * viewport.height;
    576 				float X0 = viewport.x0 + W;
    577 				float Y0 = viewport.y0 + H;
    578 				float N = viewport.minZ;
    579 				float F = viewport.maxZ;
    580 				float Z = F - N;
    581 
    582 				if(context->isDrawTriangle(false))
    583 				{
    584 					N += context->depthBias;
    585 				}
    586 
    587 				if(complementaryDepthBuffer)
    588 				{
    589 					Z = -Z;
    590 					N = 1 - N;
    591 				}
    592 
    593 				static const float X[5][16] =   // Fragment offsets
    594 				{
    595 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    596 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    597 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    598 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    599 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
    600 				};
    601 
    602 				static const float Y[5][16] =   // Fragment offsets
    603 				{
    604 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    605 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    606 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    607 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    608 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
    609 				};
    610 
    611 				int s = sw::log2(ss);
    612 
    613 				data->Wx16 = replicate(W * 16);
    614 				data->Hx16 = replicate(H * 16);
    615 				data->X0x16 = replicate(X0 * 16 - 8);
    616 				data->Y0x16 = replicate(Y0 * 16 - 8);
    617 				data->XXXX = replicate(X[s][q] / W);
    618 				data->YYYY = replicate(Y[s][q] / H);
    619 				data->halfPixelX = replicate(0.5f / W);
    620 				data->halfPixelY = replicate(0.5f / H);
    621 				data->viewportHeight = abs(viewport.height);
    622 				data->slopeDepthBias = context->slopeDepthBias;
    623 				data->depthRange = Z;
    624 				data->depthNear = N;
    625 				draw->clipFlags = clipFlags;
    626 
    627 				if(clipFlags)
    628 				{
    629 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
    630 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
    631 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
    632 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
    633 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
    634 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
    635 				}
    636 			}
    637 
    638 			// Target
    639 			{
    640 				for(int index = 0; index < RENDERTARGETS; index++)
    641 				{
    642 					draw->renderTarget[index] = context->renderTarget[index];
    643 
    644 					if(draw->renderTarget[index])
    645 					{
    646 						unsigned int layer = context->renderTargetLayer[index];
    647 						requiresSync |= context->renderTarget[index]->requiresSync();
    648 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    649 						data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true);
    650 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
    651 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
    652 					}
    653 				}
    654 
    655 				draw->depthBuffer = context->depthBuffer;
    656 				draw->stencilBuffer = context->stencilBuffer;
    657 
    658 				if(draw->depthBuffer)
    659 				{
    660 					unsigned int layer = context->depthBufferLayer;
    661 					requiresSync |= context->depthBuffer->requiresSync();
    662 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    663 					data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true);
    664 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
    665 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
    666 				}
    667 
    668 				if(draw->stencilBuffer)
    669 				{
    670 					unsigned int layer = context->stencilBufferLayer;
    671 					requiresSync |= context->stencilBuffer->requiresSync();
    672 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
    673 					data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true);
    674 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
    675 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
    676 				}
    677 			}
    678 
    679 			// Scissor
    680 			{
    681 				data->scissorX0 = scissor.x0;
    682 				data->scissorX1 = scissor.x1;
    683 				data->scissorY0 = scissor.y0;
    684 				data->scissorY1 = scissor.y1;
    685 			}
    686 
    687 			draw->primitive = 0;
    688 			draw->count = count;
    689 
    690 			draw->references = (count + batch - 1) / batch;
    691 
    692 			schedulerMutex.lock();
    693 			++nextDraw; // Atomic
    694 			schedulerMutex.unlock();
    695 
    696 			#ifndef NDEBUG
    697 			if(threadCount == 1)   // Use main thread for draw execution
    698 			{
    699 				threadsAwake = 1;
    700 				task[0].type = Task::RESUME;
    701 
    702 				taskLoop(0);
    703 			}
    704 			else
    705 			#endif
    706 			{
    707 				if(!threadsAwake)
    708 				{
    709 					suspend[0]->wait();
    710 
    711 					threadsAwake = 1;
    712 					task[0].type = Task::RESUME;
    713 
    714 					resume[0]->signal();
    715 				}
    716 			}
    717 		}
    718 
    719 		// TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization.
    720 		if(requiresSync)
    721 		{
    722 			synchronize();
    723 		}
    724 	}
    725 
    726 	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
    727 	{
    728 		blitter->clear(value, format, dest, clearRect, rgbaMask);
    729 	}
    730 
    731 	void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
    732 	{
    733 		blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
    734 	}
    735 
    736 	void Renderer::blit3D(Surface *source, Surface *dest)
    737 	{
    738 		blitter->blit3D(source, dest);
    739 	}
    740 
    741 	void Renderer::threadFunction(void *parameters)
    742 	{
    743 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
    744 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
    745 
    746 		if(logPrecision < IEEE)
    747 		{
    748 			CPUID::setFlushToZero(true);
    749 			CPUID::setDenormalsAreZero(true);
    750 		}
    751 
    752 		renderer->threadLoop(threadIndex);
    753 	}
    754 
    755 	void Renderer::threadLoop(int threadIndex)
    756 	{
    757 		while(!exitThreads)
    758 		{
    759 			taskLoop(threadIndex);
    760 
    761 			suspend[threadIndex]->signal();
    762 			resume[threadIndex]->wait();
    763 		}
    764 	}
    765 
    766 	void Renderer::taskLoop(int threadIndex)
    767 	{
    768 		while(task[threadIndex].type != Task::SUSPEND)
    769 		{
    770 			scheduleTask(threadIndex);
    771 			executeTask(threadIndex);
    772 		}
    773 	}
    774 
    775 	void Renderer::findAvailableTasks()
    776 	{
    777 		// Find pixel tasks
    778 		for(int cluster = 0; cluster < clusterCount; cluster++)
    779 		{
    780 			if(!pixelProgress[cluster].executing)
    781 			{
    782 				for(int unit = 0; unit < unitCount; unit++)
    783 				{
    784 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
    785 					{
    786 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
    787 						{
    788 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
    789 							{
    790 								Task &task = taskQueue[qHead];
    791 								task.type = Task::PIXELS;
    792 								task.primitiveUnit = unit;
    793 								task.pixelCluster = cluster;
    794 
    795 								pixelProgress[cluster].executing = true;
    796 
    797 								// Commit to the task queue
    798 								qHead = (qHead + 1) & TASK_COUNT_BITS;
    799 								qSize++;
    800 
    801 								break;
    802 							}
    803 						}
    804 					}
    805 				}
    806 			}
    807 		}
    808 
    809 		// Find primitive tasks
    810 		if(currentDraw == nextDraw)
    811 		{
    812 			return;   // No more primitives to process
    813 		}
    814 
    815 		for(int unit = 0; unit < unitCount; unit++)
    816 		{
    817 			DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
    818 
    819 			int primitive = draw->primitive;
    820 			int count = draw->count;
    821 
    822 			if(primitive >= count)
    823 			{
    824 				++currentDraw; // Atomic
    825 
    826 				if(currentDraw == nextDraw)
    827 				{
    828 					return;   // No more primitives to process
    829 				}
    830 
    831 				draw = drawList[currentDraw & DRAW_COUNT_BITS];
    832 			}
    833 
    834 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
    835 			{
    836 				primitive = draw->primitive;
    837 				count = draw->count;
    838 				int batch = draw->batchSize;
    839 
    840 				primitiveProgress[unit].drawCall = currentDraw;
    841 				primitiveProgress[unit].firstPrimitive = primitive;
    842 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
    843 
    844 				draw->primitive += batch;
    845 
    846 				Task &task = taskQueue[qHead];
    847 				task.type = Task::PRIMITIVES;
    848 				task.primitiveUnit = unit;
    849 
    850 				primitiveProgress[unit].references = -1;
    851 
    852 				// Commit to the task queue
    853 				qHead = (qHead + 1) & TASK_COUNT_BITS;
    854 				qSize++;
    855 			}
    856 		}
    857 	}
    858 
    859 	void Renderer::scheduleTask(int threadIndex)
    860 	{
    861 		schedulerMutex.lock();
    862 
    863 		int curThreadsAwake = threadsAwake;
    864 
    865 		if((int)qSize < threadCount - curThreadsAwake + 1)
    866 		{
    867 			findAvailableTasks();
    868 		}
    869 
    870 		if(qSize != 0)
    871 		{
    872 			task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
    873 			qSize--;
    874 
    875 			if(curThreadsAwake != threadCount)
    876 			{
    877 				int wakeup = qSize - curThreadsAwake + 1;
    878 
    879 				for(int i = 0; i < threadCount && wakeup > 0; i++)
    880 				{
    881 					if(task[i].type == Task::SUSPEND)
    882 					{
    883 						suspend[i]->wait();
    884 						task[i].type = Task::RESUME;
    885 						resume[i]->signal();
    886 
    887 						++threadsAwake; // Atomic
    888 						wakeup--;
    889 					}
    890 				}
    891 			}
    892 		}
    893 		else
    894 		{
    895 			task[threadIndex].type = Task::SUSPEND;
    896 
    897 			--threadsAwake; // Atomic
    898 		}
    899 
    900 		schedulerMutex.unlock();
    901 	}
    902 
    903 	void Renderer::executeTask(int threadIndex)
    904 	{
    905 		#if PERF_HUD
    906 			int64_t startTick = Timer::ticks();
    907 		#endif
    908 
    909 		switch(task[threadIndex].type)
    910 		{
    911 		case Task::PRIMITIVES:
    912 			{
    913 				int unit = task[threadIndex].primitiveUnit;
    914 
    915 				int input = primitiveProgress[unit].firstPrimitive;
    916 				int count = primitiveProgress[unit].primitiveCount;
    917 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    918 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
    919 
    920 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
    921 
    922 				#if PERF_HUD
    923 					int64_t time = Timer::ticks();
    924 					vertexTime[threadIndex] += time - startTick;
    925 					startTick = time;
    926 				#endif
    927 
    928 				int visible = 0;
    929 
    930 				if(!draw->setupState.rasterizerDiscard)
    931 				{
    932 					visible = (this->*setupPrimitives)(unit, count);
    933 				}
    934 
    935 				primitiveProgress[unit].visible = visible;
    936 				primitiveProgress[unit].references = clusterCount;
    937 
    938 				#if PERF_HUD
    939 					setupTime[threadIndex] += Timer::ticks() - startTick;
    940 				#endif
    941 			}
    942 			break;
    943 		case Task::PIXELS:
    944 			{
    945 				int unit = task[threadIndex].primitiveUnit;
    946 				int visible = primitiveProgress[unit].visible;
    947 
    948 				if(visible > 0)
    949 				{
    950 					int cluster = task[threadIndex].pixelCluster;
    951 					Primitive *primitive = primitiveBatch[unit];
    952 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
    953 					DrawData *data = draw->data;
    954 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
    955 
    956 					pixelRoutine(primitive, visible, cluster, data);
    957 				}
    958 
    959 				finishRendering(task[threadIndex]);
    960 
    961 				#if PERF_HUD
    962 					pixelTime[threadIndex] += Timer::ticks() - startTick;
    963 				#endif
    964 			}
    965 			break;
    966 		case Task::RESUME:
    967 			break;
    968 		case Task::SUSPEND:
    969 			break;
    970 		default:
    971 			ASSERT(false);
    972 		}
    973 	}
    974 
    975 	void Renderer::synchronize()
    976 	{
    977 		sync->lock(sw::PUBLIC);
    978 		sync->unlock();
    979 	}
    980 
    981 	void Renderer::finishRendering(Task &pixelTask)
    982 	{
    983 		int unit = pixelTask.primitiveUnit;
    984 		int cluster = pixelTask.pixelCluster;
    985 
    986 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    987 		DrawData &data = *draw.data;
    988 		int primitive = primitiveProgress[unit].firstPrimitive;
    989 		int count = primitiveProgress[unit].primitiveCount;
    990 		int processedPrimitives = primitive + count;
    991 
    992 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
    993 
    994 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
    995 		{
    996 			++pixelProgress[cluster].drawCall; // Atomic
    997 			pixelProgress[cluster].processedPrimitives = 0;
    998 		}
    999 
   1000 		int ref = primitiveProgress[unit].references--; // Atomic
   1001 
   1002 		if(ref == 0)
   1003 		{
   1004 			ref = draw.references--; // Atomic
   1005 
   1006 			if(ref == 0)
   1007 			{
   1008 				#if PERF_PROFILE
   1009 					for(int cluster = 0; cluster < clusterCount; cluster++)
   1010 					{
   1011 						for(int i = 0; i < PERF_TIMERS; i++)
   1012 						{
   1013 							profiler.cycles[i] += data.cycles[i][cluster];
   1014 						}
   1015 					}
   1016 				#endif
   1017 
   1018 				if(draw.queries)
   1019 				{
   1020 					for(auto &query : *(draw.queries))
   1021 					{
   1022 						switch(query->type)
   1023 						{
   1024 						case Query::FRAGMENTS_PASSED:
   1025 							for(int cluster = 0; cluster < clusterCount; cluster++)
   1026 							{
   1027 								query->data += data.occlusion[cluster];
   1028 							}
   1029 							break;
   1030 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
   1031 							query->data += processedPrimitives;
   1032 							break;
   1033 						default:
   1034 							break;
   1035 						}
   1036 
   1037 						query->release();
   1038 					}
   1039 
   1040 					delete draw.queries;
   1041 					draw.queries = 0;
   1042 				}
   1043 
   1044 				for(int i = 0; i < RENDERTARGETS; i++)
   1045 				{
   1046 					if(draw.renderTarget[i])
   1047 					{
   1048 						draw.renderTarget[i]->unlockInternal();
   1049 					}
   1050 				}
   1051 
   1052 				if(draw.depthBuffer)
   1053 				{
   1054 					draw.depthBuffer->unlockInternal();
   1055 				}
   1056 
   1057 				if(draw.stencilBuffer)
   1058 				{
   1059 					draw.stencilBuffer->unlockStencil();
   1060 				}
   1061 
   1062 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
   1063 				{
   1064 					if(draw.texture[i])
   1065 					{
   1066 						draw.texture[i]->unlock();
   1067 					}
   1068 				}
   1069 
   1070 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
   1071 				{
   1072 					if(draw.vertexStream[i])
   1073 					{
   1074 						draw.vertexStream[i]->unlock();
   1075 					}
   1076 				}
   1077 
   1078 				if(draw.indexBuffer)
   1079 				{
   1080 					draw.indexBuffer->unlock();
   1081 				}
   1082 
   1083 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
   1084 				{
   1085 					if(draw.pUniformBuffers[i])
   1086 					{
   1087 						draw.pUniformBuffers[i]->unlock();
   1088 					}
   1089 					if(draw.vUniformBuffers[i])
   1090 					{
   1091 						draw.vUniformBuffers[i]->unlock();
   1092 					}
   1093 				}
   1094 
   1095 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
   1096 				{
   1097 					if(draw.transformFeedbackBuffers[i])
   1098 					{
   1099 						draw.transformFeedbackBuffers[i]->unlock();
   1100 					}
   1101 				}
   1102 
   1103 				draw.vertexRoutine->unbind();
   1104 				draw.setupRoutine->unbind();
   1105 				draw.pixelRoutine->unbind();
   1106 
   1107 				sync->unlock();
   1108 
   1109 				draw.references = -1;
   1110 				resumeApp->signal();
   1111 			}
   1112 		}
   1113 
   1114 		pixelProgress[cluster].executing = false;
   1115 	}
   1116 
   1117 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
   1118 	{
   1119 		Triangle *triangle = triangleBatch[unit];
   1120 		int primitiveDrawCall = primitiveProgress[unit].drawCall;
   1121 		DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
   1122 		DrawData *data = draw->data;
   1123 		VertexTask *task = vertexTask[thread];
   1124 
   1125 		const void *indices = data->indices;
   1126 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
   1127 
   1128 		if(task->vertexCache.drawCall != primitiveDrawCall)
   1129 		{
   1130 			task->vertexCache.clear();
   1131 			task->vertexCache.drawCall = primitiveDrawCall;
   1132 		}
   1133 
   1134 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
   1135 
   1136 		switch(draw->drawType)
   1137 		{
   1138 		case DRAW_POINTLIST:
   1139 			{
   1140 				unsigned int index = start;
   1141 
   1142 				for(unsigned int i = 0; i < triangleCount; i++)
   1143 				{
   1144 					batch[i][0] = index;
   1145 					batch[i][1] = index;
   1146 					batch[i][2] = index;
   1147 
   1148 					index += 1;
   1149 				}
   1150 			}
   1151 			break;
   1152 		case DRAW_LINELIST:
   1153 			{
   1154 				unsigned int index = 2 * start;
   1155 
   1156 				for(unsigned int i = 0; i < triangleCount; i++)
   1157 				{
   1158 					batch[i][0] = index + 0;
   1159 					batch[i][1] = index + 1;
   1160 					batch[i][2] = index + 1;
   1161 
   1162 					index += 2;
   1163 				}
   1164 			}
   1165 			break;
   1166 		case DRAW_LINESTRIP:
   1167 			{
   1168 				unsigned int index = start;
   1169 
   1170 				for(unsigned int i = 0; i < triangleCount; i++)
   1171 				{
   1172 					batch[i][0] = index + 0;
   1173 					batch[i][1] = index + 1;
   1174 					batch[i][2] = index + 1;
   1175 
   1176 					index += 1;
   1177 				}
   1178 			}
   1179 			break;
   1180 		case DRAW_LINELOOP:
   1181 			{
   1182 				unsigned int index = start;
   1183 
   1184 				for(unsigned int i = 0; i < triangleCount; i++)
   1185 				{
   1186 					batch[i][0] = (index + 0) % loop;
   1187 					batch[i][1] = (index + 1) % loop;
   1188 					batch[i][2] = (index + 1) % loop;
   1189 
   1190 					index += 1;
   1191 				}
   1192 			}
   1193 			break;
   1194 		case DRAW_TRIANGLELIST:
   1195 			{
   1196 				unsigned int index = 3 * start;
   1197 
   1198 				for(unsigned int i = 0; i < triangleCount; i++)
   1199 				{
   1200 					batch[i][0] = index + 0;
   1201 					batch[i][1] = index + 1;
   1202 					batch[i][2] = index + 2;
   1203 
   1204 					index += 3;
   1205 				}
   1206 			}
   1207 			break;
   1208 		case DRAW_TRIANGLESTRIP:
   1209 			{
   1210 				unsigned int index = start;
   1211 
   1212 				for(unsigned int i = 0; i < triangleCount; i++)
   1213 				{
   1214 					if(leadingVertexFirst)
   1215 					{
   1216 						batch[i][0] = index + 0;
   1217 						batch[i][1] = index + (index & 1) + 1;
   1218 						batch[i][2] = index + (~index & 1) + 1;
   1219 					}
   1220 					else
   1221 					{
   1222 						batch[i][0] = index + (index & 1);
   1223 						batch[i][1] = index + (~index & 1);
   1224 						batch[i][2] = index + 2;
   1225 					}
   1226 
   1227 					index += 1;
   1228 				}
   1229 			}
   1230 			break;
   1231 		case DRAW_TRIANGLEFAN:
   1232 			{
   1233 				unsigned int index = start;
   1234 
   1235 				for(unsigned int i = 0; i < triangleCount; i++)
   1236 				{
   1237 					if(leadingVertexFirst)
   1238 					{
   1239 						batch[i][0] = index + 1;
   1240 						batch[i][1] = index + 2;
   1241 						batch[i][2] = 0;
   1242 					}
   1243 					else
   1244 					{
   1245 						batch[i][0] = 0;
   1246 						batch[i][1] = index + 1;
   1247 						batch[i][2] = index + 2;
   1248 					}
   1249 
   1250 					index += 1;
   1251 				}
   1252 			}
   1253 			break;
   1254 		case DRAW_INDEXEDPOINTLIST8:
   1255 			{
   1256 				const unsigned char *index = (const unsigned char*)indices + start;
   1257 
   1258 				for(unsigned int i = 0; i < triangleCount; i++)
   1259 				{
   1260 					batch[i][0] = *index;
   1261 					batch[i][1] = *index;
   1262 					batch[i][2] = *index;
   1263 
   1264 					index += 1;
   1265 				}
   1266 			}
   1267 			break;
   1268 		case DRAW_INDEXEDPOINTLIST16:
   1269 			{
   1270 				const unsigned short *index = (const unsigned short*)indices + start;
   1271 
   1272 				for(unsigned int i = 0; i < triangleCount; i++)
   1273 				{
   1274 					batch[i][0] = *index;
   1275 					batch[i][1] = *index;
   1276 					batch[i][2] = *index;
   1277 
   1278 					index += 1;
   1279 				}
   1280 			}
   1281 			break;
   1282 		case DRAW_INDEXEDPOINTLIST32:
   1283 			{
   1284 				const unsigned int *index = (const unsigned int*)indices + start;
   1285 
   1286 				for(unsigned int i = 0; i < triangleCount; i++)
   1287 				{
   1288 					batch[i][0] = *index;
   1289 					batch[i][1] = *index;
   1290 					batch[i][2] = *index;
   1291 
   1292 					index += 1;
   1293 				}
   1294 			}
   1295 			break;
   1296 		case DRAW_INDEXEDLINELIST8:
   1297 			{
   1298 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
   1299 
   1300 				for(unsigned int i = 0; i < triangleCount; i++)
   1301 				{
   1302 					batch[i][0] = index[0];
   1303 					batch[i][1] = index[1];
   1304 					batch[i][2] = index[1];
   1305 
   1306 					index += 2;
   1307 				}
   1308 			}
   1309 			break;
   1310 		case DRAW_INDEXEDLINELIST16:
   1311 			{
   1312 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
   1313 
   1314 				for(unsigned int i = 0; i < triangleCount; i++)
   1315 				{
   1316 					batch[i][0] = index[0];
   1317 					batch[i][1] = index[1];
   1318 					batch[i][2] = index[1];
   1319 
   1320 					index += 2;
   1321 				}
   1322 			}
   1323 			break;
   1324 		case DRAW_INDEXEDLINELIST32:
   1325 			{
   1326 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
   1327 
   1328 				for(unsigned int i = 0; i < triangleCount; i++)
   1329 				{
   1330 					batch[i][0] = index[0];
   1331 					batch[i][1] = index[1];
   1332 					batch[i][2] = index[1];
   1333 
   1334 					index += 2;
   1335 				}
   1336 			}
   1337 			break;
   1338 		case DRAW_INDEXEDLINESTRIP8:
   1339 			{
   1340 				const unsigned char *index = (const unsigned char*)indices + start;
   1341 
   1342 				for(unsigned int i = 0; i < triangleCount; i++)
   1343 				{
   1344 					batch[i][0] = index[0];
   1345 					batch[i][1] = index[1];
   1346 					batch[i][2] = index[1];
   1347 
   1348 					index += 1;
   1349 				}
   1350 			}
   1351 			break;
   1352 		case DRAW_INDEXEDLINESTRIP16:
   1353 			{
   1354 				const unsigned short *index = (const unsigned short*)indices + start;
   1355 
   1356 				for(unsigned int i = 0; i < triangleCount; i++)
   1357 				{
   1358 					batch[i][0] = index[0];
   1359 					batch[i][1] = index[1];
   1360 					batch[i][2] = index[1];
   1361 
   1362 					index += 1;
   1363 				}
   1364 			}
   1365 			break;
   1366 		case DRAW_INDEXEDLINESTRIP32:
   1367 			{
   1368 				const unsigned int *index = (const unsigned int*)indices + start;
   1369 
   1370 				for(unsigned int i = 0; i < triangleCount; i++)
   1371 				{
   1372 					batch[i][0] = index[0];
   1373 					batch[i][1] = index[1];
   1374 					batch[i][2] = index[1];
   1375 
   1376 					index += 1;
   1377 				}
   1378 			}
   1379 			break;
   1380 		case DRAW_INDEXEDLINELOOP8:
   1381 			{
   1382 				const unsigned char *index = (const unsigned char*)indices;
   1383 
   1384 				for(unsigned int i = 0; i < triangleCount; i++)
   1385 				{
   1386 					batch[i][0] = index[(start + i + 0) % loop];
   1387 					batch[i][1] = index[(start + i + 1) % loop];
   1388 					batch[i][2] = index[(start + i + 1) % loop];
   1389 				}
   1390 			}
   1391 			break;
   1392 		case DRAW_INDEXEDLINELOOP16:
   1393 			{
   1394 				const unsigned short *index = (const unsigned short*)indices;
   1395 
   1396 				for(unsigned int i = 0; i < triangleCount; i++)
   1397 				{
   1398 					batch[i][0] = index[(start + i + 0) % loop];
   1399 					batch[i][1] = index[(start + i + 1) % loop];
   1400 					batch[i][2] = index[(start + i + 1) % loop];
   1401 				}
   1402 			}
   1403 			break;
   1404 		case DRAW_INDEXEDLINELOOP32:
   1405 			{
   1406 				const unsigned int *index = (const unsigned int*)indices;
   1407 
   1408 				for(unsigned int i = 0; i < triangleCount; i++)
   1409 				{
   1410 					batch[i][0] = index[(start + i + 0) % loop];
   1411 					batch[i][1] = index[(start + i + 1) % loop];
   1412 					batch[i][2] = index[(start + i + 1) % loop];
   1413 				}
   1414 			}
   1415 			break;
   1416 		case DRAW_INDEXEDTRIANGLELIST8:
   1417 			{
   1418 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
   1419 
   1420 				for(unsigned int i = 0; i < triangleCount; i++)
   1421 				{
   1422 					batch[i][0] = index[0];
   1423 					batch[i][1] = index[1];
   1424 					batch[i][2] = index[2];
   1425 
   1426 					index += 3;
   1427 				}
   1428 			}
   1429 			break;
   1430 		case DRAW_INDEXEDTRIANGLELIST16:
   1431 			{
   1432 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
   1433 
   1434 				for(unsigned int i = 0; i < triangleCount; i++)
   1435 				{
   1436 					batch[i][0] = index[0];
   1437 					batch[i][1] = index[1];
   1438 					batch[i][2] = index[2];
   1439 
   1440 					index += 3;
   1441 				}
   1442 			}
   1443 			break;
   1444 		case DRAW_INDEXEDTRIANGLELIST32:
   1445 			{
   1446 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
   1447 
   1448 				for(unsigned int i = 0; i < triangleCount; i++)
   1449 				{
   1450 					batch[i][0] = index[0];
   1451 					batch[i][1] = index[1];
   1452 					batch[i][2] = index[2];
   1453 
   1454 					index += 3;
   1455 				}
   1456 			}
   1457 			break;
   1458 		case DRAW_INDEXEDTRIANGLESTRIP8:
   1459 			{
   1460 				const unsigned char *index = (const unsigned char*)indices + start;
   1461 
   1462 				for(unsigned int i = 0; i < triangleCount; i++)
   1463 				{
   1464 					batch[i][0] = index[0];
   1465 					batch[i][1] = index[((start + i) & 1) + 1];
   1466 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1467 
   1468 					index += 1;
   1469 				}
   1470 			}
   1471 			break;
   1472 		case DRAW_INDEXEDTRIANGLESTRIP16:
   1473 			{
   1474 				const unsigned short *index = (const unsigned short*)indices + start;
   1475 
   1476 				for(unsigned int i = 0; i < triangleCount; i++)
   1477 				{
   1478 					batch[i][0] = index[0];
   1479 					batch[i][1] = index[((start + i) & 1) + 1];
   1480 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1481 
   1482 					index += 1;
   1483 				}
   1484 			}
   1485 			break;
   1486 		case DRAW_INDEXEDTRIANGLESTRIP32:
   1487 			{
   1488 				const unsigned int *index = (const unsigned int*)indices + start;
   1489 
   1490 				for(unsigned int i = 0; i < triangleCount; i++)
   1491 				{
   1492 					batch[i][0] = index[0];
   1493 					batch[i][1] = index[((start + i) & 1) + 1];
   1494 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1495 
   1496 					index += 1;
   1497 				}
   1498 			}
   1499 			break;
   1500 		case DRAW_INDEXEDTRIANGLEFAN8:
   1501 			{
   1502 				const unsigned char *index = (const unsigned char*)indices;
   1503 
   1504 				for(unsigned int i = 0; i < triangleCount; i++)
   1505 				{
   1506 					batch[i][0] = index[start + i + 1];
   1507 					batch[i][1] = index[start + i + 2];
   1508 					batch[i][2] = index[0];
   1509 				}
   1510 			}
   1511 			break;
   1512 		case DRAW_INDEXEDTRIANGLEFAN16:
   1513 			{
   1514 				const unsigned short *index = (const unsigned short*)indices;
   1515 
   1516 				for(unsigned int i = 0; i < triangleCount; i++)
   1517 				{
   1518 					batch[i][0] = index[start + i + 1];
   1519 					batch[i][1] = index[start + i + 2];
   1520 					batch[i][2] = index[0];
   1521 				}
   1522 			}
   1523 			break;
   1524 		case DRAW_INDEXEDTRIANGLEFAN32:
   1525 			{
   1526 				const unsigned int *index = (const unsigned int*)indices;
   1527 
   1528 				for(unsigned int i = 0; i < triangleCount; i++)
   1529 				{
   1530 					batch[i][0] = index[start + i + 1];
   1531 					batch[i][1] = index[start + i + 2];
   1532 					batch[i][2] = index[0];
   1533 				}
   1534 			}
   1535 			break;
   1536 		case DRAW_QUADLIST:
   1537 			{
   1538 				unsigned int index = 4 * start / 2;
   1539 
   1540 				for(unsigned int i = 0; i < triangleCount; i += 2)
   1541 				{
   1542 					batch[i+0][0] = index + 0;
   1543 					batch[i+0][1] = index + 1;
   1544 					batch[i+0][2] = index + 2;
   1545 
   1546 					batch[i+1][0] = index + 0;
   1547 					batch[i+1][1] = index + 2;
   1548 					batch[i+1][2] = index + 3;
   1549 
   1550 					index += 4;
   1551 				}
   1552 			}
   1553 			break;
   1554 		default:
   1555 			ASSERT(false);
   1556 			return;
   1557 		}
   1558 
   1559 		task->primitiveStart = start;
   1560 		task->vertexCount = triangleCount * 3;
   1561 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
   1562 	}
   1563 
   1564 	int Renderer::setupSolidTriangles(int unit, int count)
   1565 	{
   1566 		Triangle *triangle = triangleBatch[unit];
   1567 		Primitive *primitive = primitiveBatch[unit];
   1568 
   1569 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1570 		SetupProcessor::State &state = draw.setupState;
   1571 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1572 
   1573 		int ms = state.multiSample;
   1574 		int pos = state.positionRegister;
   1575 		const DrawData *data = draw.data;
   1576 		int visible = 0;
   1577 
   1578 		for(int i = 0; i < count; i++, triangle++)
   1579 		{
   1580 			Vertex &v0 = triangle->v0;
   1581 			Vertex &v1 = triangle->v1;
   1582 			Vertex &v2 = triangle->v2;
   1583 
   1584 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
   1585 			{
   1586 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
   1587 
   1588 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
   1589 
   1590 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1591 				{
   1592 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1593 					{
   1594 						continue;
   1595 					}
   1596 				}
   1597 
   1598 				if(setupRoutine(primitive, triangle, &polygon, data))
   1599 				{
   1600 					primitive += ms;
   1601 					visible++;
   1602 				}
   1603 			}
   1604 		}
   1605 
   1606 		return visible;
   1607 	}
   1608 
   1609 	int Renderer::setupWireframeTriangle(int unit, int count)
   1610 	{
   1611 		Triangle *triangle = triangleBatch[unit];
   1612 		Primitive *primitive = primitiveBatch[unit];
   1613 		int visible = 0;
   1614 
   1615 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1616 		SetupProcessor::State &state = draw.setupState;
   1617 
   1618 		const Vertex &v0 = triangle[0].v0;
   1619 		const Vertex &v1 = triangle[0].v1;
   1620 		const Vertex &v2 = triangle[0].v2;
   1621 
   1622 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1623 
   1624 		if(state.cullMode == CULL_CLOCKWISE)
   1625 		{
   1626 			if(d >= 0) return 0;
   1627 		}
   1628 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1629 		{
   1630 			if(d <= 0) return 0;
   1631 		}
   1632 
   1633 		// Copy attributes
   1634 		triangle[1].v0 = v1;
   1635 		triangle[1].v1 = v2;
   1636 		triangle[2].v0 = v2;
   1637 		triangle[2].v1 = v0;
   1638 
   1639 		if(state.color[0][0].flat)   // FIXME
   1640 		{
   1641 			for(int i = 0; i < 2; i++)
   1642 			{
   1643 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
   1644 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
   1645 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
   1646 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
   1647 			}
   1648 		}
   1649 
   1650 		for(int i = 0; i < 3; i++)
   1651 		{
   1652 			if(setupLine(*primitive, *triangle, draw))
   1653 			{
   1654 				primitive->area = 0.5f * d;
   1655 
   1656 				primitive++;
   1657 				visible++;
   1658 			}
   1659 
   1660 			triangle++;
   1661 		}
   1662 
   1663 		return visible;
   1664 	}
   1665 
   1666 	int Renderer::setupVertexTriangle(int unit, int count)
   1667 	{
   1668 		Triangle *triangle = triangleBatch[unit];
   1669 		Primitive *primitive = primitiveBatch[unit];
   1670 		int visible = 0;
   1671 
   1672 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1673 		SetupProcessor::State &state = draw.setupState;
   1674 
   1675 		const Vertex &v0 = triangle[0].v0;
   1676 		const Vertex &v1 = triangle[0].v1;
   1677 		const Vertex &v2 = triangle[0].v2;
   1678 
   1679 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1680 
   1681 		if(state.cullMode == CULL_CLOCKWISE)
   1682 		{
   1683 			if(d >= 0) return 0;
   1684 		}
   1685 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1686 		{
   1687 			if(d <= 0) return 0;
   1688 		}
   1689 
   1690 		// Copy attributes
   1691 		triangle[1].v0 = v1;
   1692 		triangle[2].v0 = v2;
   1693 
   1694 		for(int i = 0; i < 3; i++)
   1695 		{
   1696 			if(setupPoint(*primitive, *triangle, draw))
   1697 			{
   1698 				primitive->area = 0.5f * d;
   1699 
   1700 				primitive++;
   1701 				visible++;
   1702 			}
   1703 
   1704 			triangle++;
   1705 		}
   1706 
   1707 		return visible;
   1708 	}
   1709 
   1710 	int Renderer::setupLines(int unit, int count)
   1711 	{
   1712 		Triangle *triangle = triangleBatch[unit];
   1713 		Primitive *primitive = primitiveBatch[unit];
   1714 		int visible = 0;
   1715 
   1716 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1717 		SetupProcessor::State &state = draw.setupState;
   1718 
   1719 		int ms = state.multiSample;
   1720 
   1721 		for(int i = 0; i < count; i++)
   1722 		{
   1723 			if(setupLine(*primitive, *triangle, draw))
   1724 			{
   1725 				primitive += ms;
   1726 				visible++;
   1727 			}
   1728 
   1729 			triangle++;
   1730 		}
   1731 
   1732 		return visible;
   1733 	}
   1734 
   1735 	int Renderer::setupPoints(int unit, int count)
   1736 	{
   1737 		Triangle *triangle = triangleBatch[unit];
   1738 		Primitive *primitive = primitiveBatch[unit];
   1739 		int visible = 0;
   1740 
   1741 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1742 		SetupProcessor::State &state = draw.setupState;
   1743 
   1744 		int ms = state.multiSample;
   1745 
   1746 		for(int i = 0; i < count; i++)
   1747 		{
   1748 			if(setupPoint(*primitive, *triangle, draw))
   1749 			{
   1750 				primitive += ms;
   1751 				visible++;
   1752 			}
   1753 
   1754 			triangle++;
   1755 		}
   1756 
   1757 		return visible;
   1758 	}
   1759 
   1760 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1761 	{
   1762 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1763 		const SetupProcessor::State &state = draw.setupState;
   1764 		const DrawData &data = *draw.data;
   1765 
   1766 		float lineWidth = data.lineWidth;
   1767 
   1768 		Vertex &v0 = triangle.v0;
   1769 		Vertex &v1 = triangle.v1;
   1770 
   1771 		int pos = state.positionRegister;
   1772 
   1773 		const float4 &P0 = v0.v[pos];
   1774 		const float4 &P1 = v1.v[pos];
   1775 
   1776 		if(P0.w <= 0 && P1.w <= 0)
   1777 		{
   1778 			return false;
   1779 		}
   1780 
   1781 		const float W = data.Wx16[0] * (1.0f / 16.0f);
   1782 		const float H = data.Hx16[0] * (1.0f / 16.0f);
   1783 
   1784 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
   1785 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
   1786 
   1787 		if(dx == 0 && dy == 0)
   1788 		{
   1789 			return false;
   1790 		}
   1791 
   1792 		if(state.multiSample > 1)   // Rectangle
   1793 		{
   1794 			float4 P[4];
   1795 			int C[4];
   1796 
   1797 			P[0] = P0;
   1798 			P[1] = P1;
   1799 			P[2] = P1;
   1800 			P[3] = P0;
   1801 
   1802 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
   1803 
   1804 			dx *= scale;
   1805 			dy *= scale;
   1806 
   1807 			float dx0h = dx * P0.w / H;
   1808 			float dy0w = dy * P0.w / W;
   1809 
   1810 			float dx1h = dx * P1.w / H;
   1811 			float dy1w = dy * P1.w / W;
   1812 
   1813 			P[0].x += -dy0w;
   1814 			P[0].y += +dx0h;
   1815 			C[0] = clipper->computeClipFlags(P[0]);
   1816 
   1817 			P[1].x += -dy1w;
   1818 			P[1].y += +dx1h;
   1819 			C[1] = clipper->computeClipFlags(P[1]);
   1820 
   1821 			P[2].x += +dy1w;
   1822 			P[2].y += -dx1h;
   1823 			C[2] = clipper->computeClipFlags(P[2]);
   1824 
   1825 			P[3].x += +dy0w;
   1826 			P[3].y += -dx0h;
   1827 			C[3] = clipper->computeClipFlags(P[3]);
   1828 
   1829 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1830 			{
   1831 				Polygon polygon(P, 4);
   1832 
   1833 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1834 
   1835 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1836 				{
   1837 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1838 					{
   1839 						return false;
   1840 					}
   1841 				}
   1842 
   1843 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1844 			}
   1845 		}
   1846 		else   // Diamond test convention
   1847 		{
   1848 			float4 P[8];
   1849 			int C[8];
   1850 
   1851 			P[0] = P0;
   1852 			P[1] = P0;
   1853 			P[2] = P0;
   1854 			P[3] = P0;
   1855 			P[4] = P1;
   1856 			P[5] = P1;
   1857 			P[6] = P1;
   1858 			P[7] = P1;
   1859 
   1860 			float dx0 = lineWidth * 0.5f * P0.w / W;
   1861 			float dy0 = lineWidth * 0.5f * P0.w / H;
   1862 
   1863 			float dx1 = lineWidth * 0.5f * P1.w / W;
   1864 			float dy1 = lineWidth * 0.5f * P1.w / H;
   1865 
   1866 			P[0].x += -dx0;
   1867 			C[0] = clipper->computeClipFlags(P[0]);
   1868 
   1869 			P[1].y += +dy0;
   1870 			C[1] = clipper->computeClipFlags(P[1]);
   1871 
   1872 			P[2].x += +dx0;
   1873 			C[2] = clipper->computeClipFlags(P[2]);
   1874 
   1875 			P[3].y += -dy0;
   1876 			C[3] = clipper->computeClipFlags(P[3]);
   1877 
   1878 			P[4].x += -dx1;
   1879 			C[4] = clipper->computeClipFlags(P[4]);
   1880 
   1881 			P[5].y += +dy1;
   1882 			C[5] = clipper->computeClipFlags(P[5]);
   1883 
   1884 			P[6].x += +dx1;
   1885 			C[6] = clipper->computeClipFlags(P[6]);
   1886 
   1887 			P[7].y += -dy1;
   1888 			C[7] = clipper->computeClipFlags(P[7]);
   1889 
   1890 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
   1891 			{
   1892 				float4 L[6];
   1893 
   1894 				if(dx > -dy)
   1895 				{
   1896 					if(dx > dy)   // Right
   1897 					{
   1898 						L[0] = P[0];
   1899 						L[1] = P[1];
   1900 						L[2] = P[5];
   1901 						L[3] = P[6];
   1902 						L[4] = P[7];
   1903 						L[5] = P[3];
   1904 					}
   1905 					else   // Down
   1906 					{
   1907 						L[0] = P[0];
   1908 						L[1] = P[4];
   1909 						L[2] = P[5];
   1910 						L[3] = P[6];
   1911 						L[4] = P[2];
   1912 						L[5] = P[3];
   1913 					}
   1914 				}
   1915 				else
   1916 				{
   1917 					if(dx > dy)   // Up
   1918 					{
   1919 						L[0] = P[0];
   1920 						L[1] = P[1];
   1921 						L[2] = P[2];
   1922 						L[3] = P[6];
   1923 						L[4] = P[7];
   1924 						L[5] = P[4];
   1925 					}
   1926 					else   // Left
   1927 					{
   1928 						L[0] = P[1];
   1929 						L[1] = P[2];
   1930 						L[2] = P[3];
   1931 						L[3] = P[7];
   1932 						L[4] = P[4];
   1933 						L[5] = P[5];
   1934 					}
   1935 				}
   1936 
   1937 				Polygon polygon(L, 6);
   1938 
   1939 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
   1940 
   1941 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1942 				{
   1943 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1944 					{
   1945 						return false;
   1946 					}
   1947 				}
   1948 
   1949 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1950 			}
   1951 		}
   1952 
   1953 		return false;
   1954 	}
   1955 
   1956 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1957 	{
   1958 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1959 		const SetupProcessor::State &state = draw.setupState;
   1960 		const DrawData &data = *draw.data;
   1961 
   1962 		Vertex &v = triangle.v0;
   1963 
   1964 		float pSize;
   1965 
   1966 		int pts = state.pointSizeRegister;
   1967 
   1968 		if(state.pointSizeRegister != Unused)
   1969 		{
   1970 			pSize = v.v[pts].y;
   1971 		}
   1972 		else
   1973 		{
   1974 			pSize = data.point.pointSize[0];
   1975 		}
   1976 
   1977 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
   1978 
   1979 		float4 P[4];
   1980 		int C[4];
   1981 
   1982 		int pos = state.positionRegister;
   1983 
   1984 		P[0] = v.v[pos];
   1985 		P[1] = v.v[pos];
   1986 		P[2] = v.v[pos];
   1987 		P[3] = v.v[pos];
   1988 
   1989 		const float X = pSize * P[0].w * data.halfPixelX[0];
   1990 		const float Y = pSize * P[0].w * data.halfPixelY[0];
   1991 
   1992 		P[0].x -= X;
   1993 		P[0].y += Y;
   1994 		C[0] = clipper->computeClipFlags(P[0]);
   1995 
   1996 		P[1].x += X;
   1997 		P[1].y += Y;
   1998 		C[1] = clipper->computeClipFlags(P[1]);
   1999 
   2000 		P[2].x += X;
   2001 		P[2].y -= Y;
   2002 		C[2] = clipper->computeClipFlags(P[2]);
   2003 
   2004 		P[3].x -= X;
   2005 		P[3].y -= Y;
   2006 		C[3] = clipper->computeClipFlags(P[3]);
   2007 
   2008 		triangle.v1 = triangle.v0;
   2009 		triangle.v2 = triangle.v0;
   2010 
   2011 		triangle.v1.X += iround(16 * 0.5f * pSize);
   2012 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
   2013 
   2014 		Polygon polygon(P, 4);
   2015 
   2016 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   2017 		{
   2018 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   2019 
   2020 			if(clipFlagsOr != Clipper::CLIP_FINITE)
   2021 			{
   2022 				if(!clipper->clip(polygon, clipFlagsOr, draw))
   2023 				{
   2024 					return false;
   2025 				}
   2026 			}
   2027 
   2028 			return setupRoutine(&primitive, &triangle, &polygon, &data);
   2029 		}
   2030 
   2031 		return false;
   2032 	}
   2033 
   2034 	void Renderer::initializeThreads()
   2035 	{
   2036 		unitCount = ceilPow2(threadCount);
   2037 		clusterCount = ceilPow2(threadCount);
   2038 
   2039 		for(int i = 0; i < unitCount; i++)
   2040 		{
   2041 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
   2042 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
   2043 		}
   2044 
   2045 		for(int i = 0; i < threadCount; i++)
   2046 		{
   2047 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
   2048 			vertexTask[i]->vertexCache.drawCall = -1;
   2049 
   2050 			task[i].type = Task::SUSPEND;
   2051 
   2052 			resume[i] = new Event();
   2053 			suspend[i] = new Event();
   2054 
   2055 			Parameters parameters;
   2056 			parameters.threadIndex = i;
   2057 			parameters.renderer = this;
   2058 
   2059 			exitThreads = false;
   2060 			worker[i] = new Thread(threadFunction, &parameters);
   2061 
   2062 			suspend[i]->wait();
   2063 			suspend[i]->signal();
   2064 		}
   2065 	}
   2066 
   2067 	void Renderer::terminateThreads()
   2068 	{
   2069 		while(threadsAwake != 0)
   2070 		{
   2071 			Thread::sleep(1);
   2072 		}
   2073 
   2074 		for(int thread = 0; thread < threadCount; thread++)
   2075 		{
   2076 			if(worker[thread])
   2077 			{
   2078 				exitThreads = true;
   2079 				resume[thread]->signal();
   2080 				worker[thread]->join();
   2081 
   2082 				delete worker[thread];
   2083 				worker[thread] = 0;
   2084 				delete resume[thread];
   2085 				resume[thread] = 0;
   2086 				delete suspend[thread];
   2087 				suspend[thread] = 0;
   2088 			}
   2089 
   2090 			deallocate(vertexTask[thread]);
   2091 			vertexTask[thread] = 0;
   2092 		}
   2093 
   2094 		for(int i = 0; i < 16; i++)
   2095 		{
   2096 			deallocate(triangleBatch[i]);
   2097 			triangleBatch[i] = 0;
   2098 
   2099 			deallocate(primitiveBatch[i]);
   2100 			primitiveBatch[i] = 0;
   2101 		}
   2102 	}
   2103 
   2104 	void Renderer::loadConstants(const VertexShader *vertexShader)
   2105 	{
   2106 		if(!vertexShader) return;
   2107 
   2108 		size_t count = vertexShader->getLength();
   2109 
   2110 		for(size_t i = 0; i < count; i++)
   2111 		{
   2112 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
   2113 
   2114 			if(instruction->opcode == Shader::OPCODE_DEF)
   2115 			{
   2116 				int index = instruction->dst.index;
   2117 				float value[4];
   2118 
   2119 				value[0] = instruction->src[0].value[0];
   2120 				value[1] = instruction->src[0].value[1];
   2121 				value[2] = instruction->src[0].value[2];
   2122 				value[3] = instruction->src[0].value[3];
   2123 
   2124 				setVertexShaderConstantF(index, value);
   2125 			}
   2126 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2127 			{
   2128 				int index = instruction->dst.index;
   2129 				int integer[4];
   2130 
   2131 				integer[0] = instruction->src[0].integer[0];
   2132 				integer[1] = instruction->src[0].integer[1];
   2133 				integer[2] = instruction->src[0].integer[2];
   2134 				integer[3] = instruction->src[0].integer[3];
   2135 
   2136 				setVertexShaderConstantI(index, integer);
   2137 			}
   2138 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2139 			{
   2140 				int index = instruction->dst.index;
   2141 				int boolean = instruction->src[0].boolean[0];
   2142 
   2143 				setVertexShaderConstantB(index, &boolean);
   2144 			}
   2145 		}
   2146 	}
   2147 
   2148 	void Renderer::loadConstants(const PixelShader *pixelShader)
   2149 	{
   2150 		if(!pixelShader) return;
   2151 
   2152 		size_t count = pixelShader->getLength();
   2153 
   2154 		for(size_t i = 0; i < count; i++)
   2155 		{
   2156 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
   2157 
   2158 			if(instruction->opcode == Shader::OPCODE_DEF)
   2159 			{
   2160 				int index = instruction->dst.index;
   2161 				float value[4];
   2162 
   2163 				value[0] = instruction->src[0].value[0];
   2164 				value[1] = instruction->src[0].value[1];
   2165 				value[2] = instruction->src[0].value[2];
   2166 				value[3] = instruction->src[0].value[3];
   2167 
   2168 				setPixelShaderConstantF(index, value);
   2169 			}
   2170 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2171 			{
   2172 				int index = instruction->dst.index;
   2173 				int integer[4];
   2174 
   2175 				integer[0] = instruction->src[0].integer[0];
   2176 				integer[1] = instruction->src[0].integer[1];
   2177 				integer[2] = instruction->src[0].integer[2];
   2178 				integer[3] = instruction->src[0].integer[3];
   2179 
   2180 				setPixelShaderConstantI(index, integer);
   2181 			}
   2182 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2183 			{
   2184 				int index = instruction->dst.index;
   2185 				int boolean = instruction->src[0].boolean[0];
   2186 
   2187 				setPixelShaderConstantB(index, &boolean);
   2188 			}
   2189 		}
   2190 	}
   2191 
   2192 	void Renderer::setIndexBuffer(Resource *indexBuffer)
   2193 	{
   2194 		context->indexBuffer = indexBuffer;
   2195 	}
   2196 
   2197 	void Renderer::setMultiSampleMask(unsigned int mask)
   2198 	{
   2199 		context->sampleMask = mask;
   2200 	}
   2201 
   2202 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
   2203 	{
   2204 		sw::transparencyAntialiasing = transparencyAntialiasing;
   2205 	}
   2206 
   2207 	bool Renderer::isReadWriteTexture(int sampler)
   2208 	{
   2209 		for(int index = 0; index < RENDERTARGETS; index++)
   2210 		{
   2211 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
   2212 			{
   2213 				return true;
   2214 			}
   2215 		}
   2216 
   2217 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
   2218 		{
   2219 			return true;
   2220 		}
   2221 
   2222 		return false;
   2223 	}
   2224 
   2225 	void Renderer::updateClipper()
   2226 	{
   2227 		if(updateClipPlanes)
   2228 		{
   2229 			if(VertexProcessor::isFixedFunction())   // User plane in world space
   2230 			{
   2231 				const Matrix &scissorWorld = getViewTransform();
   2232 
   2233 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
   2234 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
   2235 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
   2236 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
   2237 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
   2238 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
   2239 			}
   2240 			else   // User plane in clip space
   2241 			{
   2242 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
   2243 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
   2244 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
   2245 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
   2246 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
   2247 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
   2248 			}
   2249 
   2250 			updateClipPlanes = false;
   2251 		}
   2252 	}
   2253 
   2254 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
   2255 	{
   2256 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
   2257 
   2258 		context->texture[sampler] = resource;
   2259 	}
   2260 
   2261 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
   2262 	{
   2263 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
   2264 
   2265 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
   2266 	}
   2267 
   2268 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
   2269 	{
   2270 		if(type == SAMPLER_PIXEL)
   2271 		{
   2272 			PixelProcessor::setTextureFilter(sampler, textureFilter);
   2273 		}
   2274 		else
   2275 		{
   2276 			VertexProcessor::setTextureFilter(sampler, textureFilter);
   2277 		}
   2278 	}
   2279 
   2280 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
   2281 	{
   2282 		if(type == SAMPLER_PIXEL)
   2283 		{
   2284 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
   2285 		}
   2286 		else
   2287 		{
   2288 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
   2289 		}
   2290 	}
   2291 
   2292 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
   2293 	{
   2294 		if(type == SAMPLER_PIXEL)
   2295 		{
   2296 			PixelProcessor::setGatherEnable(sampler, enable);
   2297 		}
   2298 		else
   2299 		{
   2300 			VertexProcessor::setGatherEnable(sampler, enable);
   2301 		}
   2302 	}
   2303 
   2304 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
   2305 	{
   2306 		if(type == SAMPLER_PIXEL)
   2307 		{
   2308 			PixelProcessor::setAddressingModeU(sampler, addressMode);
   2309 		}
   2310 		else
   2311 		{
   2312 			VertexProcessor::setAddressingModeU(sampler, addressMode);
   2313 		}
   2314 	}
   2315 
   2316 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
   2317 	{
   2318 		if(type == SAMPLER_PIXEL)
   2319 		{
   2320 			PixelProcessor::setAddressingModeV(sampler, addressMode);
   2321 		}
   2322 		else
   2323 		{
   2324 			VertexProcessor::setAddressingModeV(sampler, addressMode);
   2325 		}
   2326 	}
   2327 
   2328 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
   2329 	{
   2330 		if(type == SAMPLER_PIXEL)
   2331 		{
   2332 			PixelProcessor::setAddressingModeW(sampler, addressMode);
   2333 		}
   2334 		else
   2335 		{
   2336 			VertexProcessor::setAddressingModeW(sampler, addressMode);
   2337 		}
   2338 	}
   2339 
   2340 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
   2341 	{
   2342 		if(type == SAMPLER_PIXEL)
   2343 		{
   2344 			PixelProcessor::setReadSRGB(sampler, sRGB);
   2345 		}
   2346 		else
   2347 		{
   2348 			VertexProcessor::setReadSRGB(sampler, sRGB);
   2349 		}
   2350 	}
   2351 
   2352 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
   2353 	{
   2354 		if(type == SAMPLER_PIXEL)
   2355 		{
   2356 			PixelProcessor::setMipmapLOD(sampler, bias);
   2357 		}
   2358 		else
   2359 		{
   2360 			VertexProcessor::setMipmapLOD(sampler, bias);
   2361 		}
   2362 	}
   2363 
   2364 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
   2365 	{
   2366 		if(type == SAMPLER_PIXEL)
   2367 		{
   2368 			PixelProcessor::setBorderColor(sampler, borderColor);
   2369 		}
   2370 		else
   2371 		{
   2372 			VertexProcessor::setBorderColor(sampler, borderColor);
   2373 		}
   2374 	}
   2375 
   2376 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
   2377 	{
   2378 		if(type == SAMPLER_PIXEL)
   2379 		{
   2380 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2381 		}
   2382 		else
   2383 		{
   2384 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2385 		}
   2386 	}
   2387 
   2388 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
   2389 	{
   2390 		if(type == SAMPLER_PIXEL)
   2391 		{
   2392 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2393 		}
   2394 		else
   2395 		{
   2396 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2397 		}
   2398 	}
   2399 
   2400 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
   2401 	{
   2402 		if(type == SAMPLER_PIXEL)
   2403 		{
   2404 			PixelProcessor::setSwizzleR(sampler, swizzleR);
   2405 		}
   2406 		else
   2407 		{
   2408 			VertexProcessor::setSwizzleR(sampler, swizzleR);
   2409 		}
   2410 	}
   2411 
   2412 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
   2413 	{
   2414 		if(type == SAMPLER_PIXEL)
   2415 		{
   2416 			PixelProcessor::setSwizzleG(sampler, swizzleG);
   2417 		}
   2418 		else
   2419 		{
   2420 			VertexProcessor::setSwizzleG(sampler, swizzleG);
   2421 		}
   2422 	}
   2423 
   2424 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
   2425 	{
   2426 		if(type == SAMPLER_PIXEL)
   2427 		{
   2428 			PixelProcessor::setSwizzleB(sampler, swizzleB);
   2429 		}
   2430 		else
   2431 		{
   2432 			VertexProcessor::setSwizzleB(sampler, swizzleB);
   2433 		}
   2434 	}
   2435 
   2436 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
   2437 	{
   2438 		if(type == SAMPLER_PIXEL)
   2439 		{
   2440 			PixelProcessor::setSwizzleA(sampler, swizzleA);
   2441 		}
   2442 		else
   2443 		{
   2444 			VertexProcessor::setSwizzleA(sampler, swizzleA);
   2445 		}
   2446 	}
   2447 
   2448 	void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
   2449 	{
   2450 		if(type == SAMPLER_PIXEL)
   2451 		{
   2452 			PixelProcessor::setCompareFunc(sampler, compFunc);
   2453 		}
   2454 		else
   2455 		{
   2456 			VertexProcessor::setCompareFunc(sampler, compFunc);
   2457 		}
   2458 	}
   2459 
   2460 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
   2461 	{
   2462 		if(type == SAMPLER_PIXEL)
   2463 		{
   2464 			PixelProcessor::setBaseLevel(sampler, baseLevel);
   2465 		}
   2466 		else
   2467 		{
   2468 			VertexProcessor::setBaseLevel(sampler, baseLevel);
   2469 		}
   2470 	}
   2471 
   2472 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
   2473 	{
   2474 		if(type == SAMPLER_PIXEL)
   2475 		{
   2476 			PixelProcessor::setMaxLevel(sampler, maxLevel);
   2477 		}
   2478 		else
   2479 		{
   2480 			VertexProcessor::setMaxLevel(sampler, maxLevel);
   2481 		}
   2482 	}
   2483 
   2484 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
   2485 	{
   2486 		if(type == SAMPLER_PIXEL)
   2487 		{
   2488 			PixelProcessor::setMinLod(sampler, minLod);
   2489 		}
   2490 		else
   2491 		{
   2492 			VertexProcessor::setMinLod(sampler, minLod);
   2493 		}
   2494 	}
   2495 
   2496 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
   2497 	{
   2498 		if(type == SAMPLER_PIXEL)
   2499 		{
   2500 			PixelProcessor::setMaxLod(sampler, maxLod);
   2501 		}
   2502 		else
   2503 		{
   2504 			VertexProcessor::setMaxLod(sampler, maxLod);
   2505 		}
   2506 	}
   2507 
   2508 	void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired)
   2509 	{
   2510 		if(type == SAMPLER_PIXEL)
   2511 		{
   2512 			PixelProcessor::setSyncRequired(sampler, syncRequired);
   2513 		}
   2514 		else
   2515 		{
   2516 			VertexProcessor::setSyncRequired(sampler, syncRequired);
   2517 		}
   2518 	}
   2519 
   2520 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
   2521 	{
   2522 		context->setPointSpriteEnable(pointSpriteEnable);
   2523 	}
   2524 
   2525 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
   2526 	{
   2527 		context->setPointScaleEnable(pointScaleEnable);
   2528 	}
   2529 
   2530 	void Renderer::setLineWidth(float width)
   2531 	{
   2532 		context->lineWidth = width;
   2533 	}
   2534 
   2535 	void Renderer::setDepthBias(float bias)
   2536 	{
   2537 		context->depthBias = bias;
   2538 	}
   2539 
   2540 	void Renderer::setSlopeDepthBias(float slopeBias)
   2541 	{
   2542 		context->slopeDepthBias = slopeBias;
   2543 	}
   2544 
   2545 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
   2546 	{
   2547 		context->rasterizerDiscard = rasterizerDiscard;
   2548 	}
   2549 
   2550 	void Renderer::setPixelShader(const PixelShader *shader)
   2551 	{
   2552 		context->pixelShader = shader;
   2553 
   2554 		loadConstants(shader);
   2555 	}
   2556 
   2557 	void Renderer::setVertexShader(const VertexShader *shader)
   2558 	{
   2559 		context->vertexShader = shader;
   2560 
   2561 		loadConstants(shader);
   2562 	}
   2563 
   2564 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2565 	{
   2566 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2567 		{
   2568 			if(drawCall[i]->psDirtyConstF < index + count)
   2569 			{
   2570 				drawCall[i]->psDirtyConstF = index + count;
   2571 			}
   2572 		}
   2573 
   2574 		for(unsigned int i = 0; i < count; i++)
   2575 		{
   2576 			PixelProcessor::setFloatConstant(index + i, value);
   2577 			value += 4;
   2578 		}
   2579 	}
   2580 
   2581 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2582 	{
   2583 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2584 		{
   2585 			if(drawCall[i]->psDirtyConstI < index + count)
   2586 			{
   2587 				drawCall[i]->psDirtyConstI = index + count;
   2588 			}
   2589 		}
   2590 
   2591 		for(unsigned int i = 0; i < count; i++)
   2592 		{
   2593 			PixelProcessor::setIntegerConstant(index + i, value);
   2594 			value += 4;
   2595 		}
   2596 	}
   2597 
   2598 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2599 	{
   2600 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2601 		{
   2602 			if(drawCall[i]->psDirtyConstB < index + count)
   2603 			{
   2604 				drawCall[i]->psDirtyConstB = index + count;
   2605 			}
   2606 		}
   2607 
   2608 		for(unsigned int i = 0; i < count; i++)
   2609 		{
   2610 			PixelProcessor::setBooleanConstant(index + i, *boolean);
   2611 			boolean++;
   2612 		}
   2613 	}
   2614 
   2615 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2616 	{
   2617 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2618 		{
   2619 			if(drawCall[i]->vsDirtyConstF < index + count)
   2620 			{
   2621 				drawCall[i]->vsDirtyConstF = index + count;
   2622 			}
   2623 		}
   2624 
   2625 		for(unsigned int i = 0; i < count; i++)
   2626 		{
   2627 			VertexProcessor::setFloatConstant(index + i, value);
   2628 			value += 4;
   2629 		}
   2630 	}
   2631 
   2632 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2633 	{
   2634 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2635 		{
   2636 			if(drawCall[i]->vsDirtyConstI < index + count)
   2637 			{
   2638 				drawCall[i]->vsDirtyConstI = index + count;
   2639 			}
   2640 		}
   2641 
   2642 		for(unsigned int i = 0; i < count; i++)
   2643 		{
   2644 			VertexProcessor::setIntegerConstant(index + i, value);
   2645 			value += 4;
   2646 		}
   2647 	}
   2648 
   2649 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2650 	{
   2651 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2652 		{
   2653 			if(drawCall[i]->vsDirtyConstB < index + count)
   2654 			{
   2655 				drawCall[i]->vsDirtyConstB = index + count;
   2656 			}
   2657 		}
   2658 
   2659 		for(unsigned int i = 0; i < count; i++)
   2660 		{
   2661 			VertexProcessor::setBooleanConstant(index + i, *boolean);
   2662 			boolean++;
   2663 		}
   2664 	}
   2665 
   2666 	void Renderer::setModelMatrix(const Matrix &M, int i)
   2667 	{
   2668 		VertexProcessor::setModelMatrix(M, i);
   2669 	}
   2670 
   2671 	void Renderer::setViewMatrix(const Matrix &V)
   2672 	{
   2673 		VertexProcessor::setViewMatrix(V);
   2674 		updateClipPlanes = true;
   2675 	}
   2676 
   2677 	void Renderer::setBaseMatrix(const Matrix &B)
   2678 	{
   2679 		VertexProcessor::setBaseMatrix(B);
   2680 		updateClipPlanes = true;
   2681 	}
   2682 
   2683 	void Renderer::setProjectionMatrix(const Matrix &P)
   2684 	{
   2685 		VertexProcessor::setProjectionMatrix(P);
   2686 		updateClipPlanes = true;
   2687 	}
   2688 
   2689 	void Renderer::addQuery(Query *query)
   2690 	{
   2691 		queries.push_back(query);
   2692 	}
   2693 
   2694 	void Renderer::removeQuery(Query *query)
   2695 	{
   2696 		queries.remove(query);
   2697 	}
   2698 
   2699 	#if PERF_HUD
   2700 		int Renderer::getThreadCount()
   2701 		{
   2702 			return threadCount;
   2703 		}
   2704 
   2705 		int64_t Renderer::getVertexTime(int thread)
   2706 		{
   2707 			return vertexTime[thread];
   2708 		}
   2709 
   2710 		int64_t Renderer::getSetupTime(int thread)
   2711 		{
   2712 			return setupTime[thread];
   2713 		}
   2714 
   2715 		int64_t Renderer::getPixelTime(int thread)
   2716 		{
   2717 			return pixelTime[thread];
   2718 		}
   2719 
   2720 		void Renderer::resetTimers()
   2721 		{
   2722 			for(int thread = 0; thread < threadCount; thread++)
   2723 			{
   2724 				vertexTime[thread] = 0;
   2725 				setupTime[thread] = 0;
   2726 				pixelTime[thread] = 0;
   2727 			}
   2728 		}
   2729 	#endif
   2730 
   2731 	void Renderer::setViewport(const Viewport &viewport)
   2732 	{
   2733 		this->viewport = viewport;
   2734 	}
   2735 
   2736 	void Renderer::setScissor(const Rect &scissor)
   2737 	{
   2738 		this->scissor = scissor;
   2739 	}
   2740 
   2741 	void Renderer::setClipFlags(int flags)
   2742 	{
   2743 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
   2744 	}
   2745 
   2746 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
   2747 	{
   2748 		if(index < MAX_CLIP_PLANES)
   2749 		{
   2750 			userPlane[index] = plane;
   2751 		}
   2752 		else ASSERT(false);
   2753 
   2754 		updateClipPlanes = true;
   2755 	}
   2756 
   2757 	void Renderer::updateConfiguration(bool initialUpdate)
   2758 	{
   2759 		bool newConfiguration = swiftConfig->hasNewConfiguration();
   2760 
   2761 		if(newConfiguration || initialUpdate)
   2762 		{
   2763 			terminateThreads();
   2764 
   2765 			SwiftConfig::Configuration configuration = {};
   2766 			swiftConfig->getConfiguration(configuration);
   2767 
   2768 			precacheVertex = !newConfiguration && configuration.precache;
   2769 			precacheSetup = !newConfiguration && configuration.precache;
   2770 			precachePixel = !newConfiguration && configuration.precache;
   2771 
   2772 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
   2773 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
   2774 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
   2775 
   2776 			switch(configuration.textureSampleQuality)
   2777 			{
   2778 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
   2779 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
   2780 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2781 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2782 			}
   2783 
   2784 			switch(configuration.mipmapQuality)
   2785 			{
   2786 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
   2787 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2788 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2789 			}
   2790 
   2791 			setPerspectiveCorrection(configuration.perspectiveCorrection);
   2792 
   2793 			switch(configuration.transcendentalPrecision)
   2794 			{
   2795 			case 0:
   2796 				logPrecision = APPROXIMATE;
   2797 				expPrecision = APPROXIMATE;
   2798 				rcpPrecision = APPROXIMATE;
   2799 				rsqPrecision = APPROXIMATE;
   2800 				break;
   2801 			case 1:
   2802 				logPrecision = PARTIAL;
   2803 				expPrecision = PARTIAL;
   2804 				rcpPrecision = PARTIAL;
   2805 				rsqPrecision = PARTIAL;
   2806 				break;
   2807 			case 2:
   2808 				logPrecision = ACCURATE;
   2809 				expPrecision = ACCURATE;
   2810 				rcpPrecision = ACCURATE;
   2811 				rsqPrecision = ACCURATE;
   2812 				break;
   2813 			case 3:
   2814 				logPrecision = WHQL;
   2815 				expPrecision = WHQL;
   2816 				rcpPrecision = WHQL;
   2817 				rsqPrecision = WHQL;
   2818 				break;
   2819 			case 4:
   2820 				logPrecision = IEEE;
   2821 				expPrecision = IEEE;
   2822 				rcpPrecision = IEEE;
   2823 				rsqPrecision = IEEE;
   2824 				break;
   2825 			default:
   2826 				logPrecision = ACCURATE;
   2827 				expPrecision = ACCURATE;
   2828 				rcpPrecision = ACCURATE;
   2829 				rsqPrecision = ACCURATE;
   2830 				break;
   2831 			}
   2832 
   2833 			switch(configuration.transparencyAntialiasing)
   2834 			{
   2835 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2836 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
   2837 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2838 			}
   2839 
   2840 			switch(configuration.threadCount)
   2841 			{
   2842 			case -1: threadCount = CPUID::coreCount();        break;
   2843 			case 0:  threadCount = CPUID::processAffinity();  break;
   2844 			default: threadCount = configuration.threadCount; break;
   2845 			}
   2846 
   2847 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
   2848 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
   2849 			CPUID::setEnableSSE3(configuration.enableSSE3);
   2850 			CPUID::setEnableSSE2(configuration.enableSSE2);
   2851 			CPUID::setEnableSSE(configuration.enableSSE);
   2852 
   2853 			for(int pass = 0; pass < 10; pass++)
   2854 			{
   2855 				optimization[pass] = configuration.optimization[pass];
   2856 			}
   2857 
   2858 			forceWindowed = configuration.forceWindowed;
   2859 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
   2860 			postBlendSRGB = configuration.postBlendSRGB;
   2861 			exactColorRounding = configuration.exactColorRounding;
   2862 			forceClearRegisters = configuration.forceClearRegisters;
   2863 
   2864 		#ifndef NDEBUG
   2865 			minPrimitives = configuration.minPrimitives;
   2866 			maxPrimitives = configuration.maxPrimitives;
   2867 		#endif
   2868 		}
   2869 
   2870 		if(!initialUpdate && !worker[0])
   2871 		{
   2872 			initializeThreads();
   2873 		}
   2874 	}
   2875 }
   2876