Home | History | Annotate | Download | only in Renderer
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Renderer.hpp"
     16 
     17 #include "Clipper.hpp"
     18 #include "Math.hpp"
     19 #include "FrameBuffer.hpp"
     20 #include "Timer.hpp"
     21 #include "Surface.hpp"
     22 #include "Half.hpp"
     23 #include "Primitive.hpp"
     24 #include "Polygon.hpp"
     25 #include "SwiftConfig.hpp"
     26 #include "MutexLock.hpp"
     27 #include "CPUID.hpp"
     28 #include "Memory.hpp"
     29 #include "Resource.hpp"
     30 #include "Constants.hpp"
     31 #include "Debug.hpp"
     32 #include "Reactor/Reactor.hpp"
     33 
     34 #undef max
     35 
     36 bool disableServer = true;
     37 
     38 #ifndef NDEBUG
     39 unsigned int minPrimitives = 1;
     40 unsigned int maxPrimitives = 1 << 21;
     41 #endif
     42 
     43 namespace sw
     44 {
     45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
     46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
     47 	extern bool booleanFaceRegister;
     48 	extern bool fullPixelPositionRegister;
     49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
     50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
     51 	extern bool colorsDefaultToZero;
     52 
     53 	extern bool forceWindowed;
     54 	extern bool complementaryDepthBuffer;
     55 	extern bool postBlendSRGB;
     56 	extern bool exactColorRounding;
     57 	extern TransparencyAntialiasing transparencyAntialiasing;
     58 	extern bool forceClearRegisters;
     59 
     60 	extern bool precacheVertex;
     61 	extern bool precacheSetup;
     62 	extern bool precachePixel;
     63 
     64 	int batchSize = 128;
     65 	int threadCount = 1;
     66 	int unitCount = 1;
     67 	int clusterCount = 1;
     68 
     69 	TranscendentalPrecision logPrecision = ACCURATE;
     70 	TranscendentalPrecision expPrecision = ACCURATE;
     71 	TranscendentalPrecision rcpPrecision = ACCURATE;
     72 	TranscendentalPrecision rsqPrecision = ACCURATE;
     73 	bool perspectiveCorrection = true;
     74 
     75 	struct Parameters
     76 	{
     77 		Renderer *renderer;
     78 		int threadIndex;
     79 	};
     80 
     81 	DrawCall::DrawCall()
     82 	{
     83 		queries = 0;
     84 
     85 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
     86 		vsDirtyConstI = 16;
     87 		vsDirtyConstB = 16;
     88 
     89 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
     90 		psDirtyConstI = 16;
     91 		psDirtyConstB = 16;
     92 
     93 		references = -1;
     94 
     95 		data = (DrawData*)allocate(sizeof(DrawData));
     96 		data->constants = &constants;
     97 	}
     98 
     99 	DrawCall::~DrawCall()
    100 	{
    101 		delete queries;
    102 
    103 		deallocate(data);
    104 	}
    105 
    106 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
    107 	{
    108 		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
    109 		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
    110 		sw::booleanFaceRegister = conventions.booleanFaceRegister;
    111 		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
    112 		sw::leadingVertexFirst = conventions.leadingVertexFirst;
    113 		sw::secondaryColor = conventions.secondaryColor;
    114 		sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
    115 		sw::exactColorRounding = exactColorRounding;
    116 
    117 		setRenderTarget(0, 0);
    118 		clipper = new Clipper(symmetricNormalizedDepth);
    119 		blitter = new Blitter;
    120 
    121 		updateViewMatrix = true;
    122 		updateBaseMatrix = true;
    123 		updateProjectionMatrix = true;
    124 		updateClipPlanes = true;
    125 
    126 		#if PERF_HUD
    127 			resetTimers();
    128 		#endif
    129 
    130 		for(int i = 0; i < 16; i++)
    131 		{
    132 			vertexTask[i] = 0;
    133 
    134 			worker[i] = 0;
    135 			resume[i] = 0;
    136 			suspend[i] = 0;
    137 		}
    138 
    139 		threadsAwake = 0;
    140 		resumeApp = new Event();
    141 
    142 		currentDraw = 0;
    143 		nextDraw = 0;
    144 
    145 		qHead = 0;
    146 		qSize = 0;
    147 
    148 		for(int i = 0; i < 16; i++)
    149 		{
    150 			triangleBatch[i] = 0;
    151 			primitiveBatch[i] = 0;
    152 		}
    153 
    154 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    155 		{
    156 			drawCall[draw] = new DrawCall();
    157 			drawList[draw] = drawCall[draw];
    158 		}
    159 
    160 		for(int unit = 0; unit < 16; unit++)
    161 		{
    162 			primitiveProgress[unit].init();
    163 		}
    164 
    165 		for(int cluster = 0; cluster < 16; cluster++)
    166 		{
    167 			pixelProgress[cluster].init();
    168 		}
    169 
    170 		clipFlags = 0;
    171 
    172 		swiftConfig = new SwiftConfig(disableServer);
    173 		updateConfiguration(true);
    174 
    175 		sync = new Resource(0);
    176 	}
    177 
    178 	Renderer::~Renderer()
    179 	{
    180 		sync->destruct();
    181 
    182 		delete clipper;
    183 		clipper = nullptr;
    184 
    185 		delete blitter;
    186 		blitter = nullptr;
    187 
    188 		terminateThreads();
    189 		delete resumeApp;
    190 
    191 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    192 		{
    193 			delete drawCall[draw];
    194 		}
    195 
    196 		delete swiftConfig;
    197 	}
    198 
    199 	// This object has to be mem aligned
    200 	void* Renderer::operator new(size_t size)
    201 	{
    202 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
    203 		return sw::allocate(sizeof(Renderer), 16);
    204 	}
    205 
    206 	void Renderer::operator delete(void * mem)
    207 	{
    208 		sw::deallocate(mem);
    209 	}
    210 
    211 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
    212 	{
    213 		#ifndef NDEBUG
    214 			if(count < minPrimitives || count > maxPrimitives)
    215 			{
    216 				return;
    217 			}
    218 		#endif
    219 
    220 		context->drawType = drawType;
    221 
    222 		updateConfiguration();
    223 		updateClipper();
    224 
    225 		int ss = context->getSuperSampleCount();
    226 		int ms = context->getMultiSampleCount();
    227 
    228 		for(int q = 0; q < ss; q++)
    229 		{
    230 			unsigned int oldMultiSampleMask = context->multiSampleMask;
    231 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
    232 
    233 			if(!context->multiSampleMask)
    234 			{
    235 				continue;
    236 			}
    237 
    238 			sync->lock(sw::PRIVATE);
    239 
    240 			if(update || oldMultiSampleMask != context->multiSampleMask)
    241 			{
    242 				vertexState = VertexProcessor::update(drawType);
    243 				setupState = SetupProcessor::update();
    244 				pixelState = PixelProcessor::update();
    245 
    246 				vertexRoutine = VertexProcessor::routine(vertexState);
    247 				setupRoutine = SetupProcessor::routine(setupState);
    248 				pixelRoutine = PixelProcessor::routine(pixelState);
    249 			}
    250 
    251 			int batch = batchSize / ms;
    252 
    253 			int (Renderer::*setupPrimitives)(int batch, int count);
    254 
    255 			if(context->isDrawTriangle())
    256 			{
    257 				switch(context->fillMode)
    258 				{
    259 				case FILL_SOLID:
    260 					setupPrimitives = &Renderer::setupSolidTriangles;
    261 					break;
    262 				case FILL_WIREFRAME:
    263 					setupPrimitives = &Renderer::setupWireframeTriangle;
    264 					batch = 1;
    265 					break;
    266 				case FILL_VERTEX:
    267 					setupPrimitives = &Renderer::setupVertexTriangle;
    268 					batch = 1;
    269 					break;
    270 				default:
    271 					ASSERT(false);
    272 					return;
    273 				}
    274 			}
    275 			else if(context->isDrawLine())
    276 			{
    277 				setupPrimitives = &Renderer::setupLines;
    278 			}
    279 			else   // Point draw
    280 			{
    281 				setupPrimitives = &Renderer::setupPoints;
    282 			}
    283 
    284 			DrawCall *draw = 0;
    285 
    286 			do
    287 			{
    288 				for(int i = 0; i < DRAW_COUNT; i++)
    289 				{
    290 					if(drawCall[i]->references == -1)
    291 					{
    292 						draw = drawCall[i];
    293 						drawList[nextDraw % DRAW_COUNT] = draw;
    294 
    295 						break;
    296 					}
    297 				}
    298 
    299 				if(!draw)
    300 				{
    301 					resumeApp->wait();
    302 				}
    303 			}
    304 			while(!draw);
    305 
    306 			DrawData *data = draw->data;
    307 
    308 			if(queries.size() != 0)
    309 			{
    310 				draw->queries = new std::list<Query*>();
    311 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
    312 				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
    313 				{
    314 					Query* q = *query;
    315 					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
    316 					{
    317 						atomicIncrement(&(q->reference));
    318 						draw->queries->push_back(q);
    319 					}
    320 				}
    321 			}
    322 
    323 			draw->drawType = drawType;
    324 			draw->batchSize = batch;
    325 
    326 			vertexRoutine->bind();
    327 			setupRoutine->bind();
    328 			pixelRoutine->bind();
    329 
    330 			draw->vertexRoutine = vertexRoutine;
    331 			draw->setupRoutine = setupRoutine;
    332 			draw->pixelRoutine = pixelRoutine;
    333 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
    334 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
    335 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
    336 			draw->setupPrimitives = setupPrimitives;
    337 			draw->setupState = setupState;
    338 
    339 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
    340 			{
    341 				draw->vertexStream[i] = context->input[i].resource;
    342 				data->input[i] = context->input[i].buffer;
    343 				data->stride[i] = context->input[i].stride;
    344 
    345 				if(draw->vertexStream[i])
    346 				{
    347 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
    348 				}
    349 			}
    350 
    351 			if(context->indexBuffer)
    352 			{
    353 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
    354 			}
    355 
    356 			draw->indexBuffer = context->indexBuffer;
    357 
    358 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
    359 			{
    360 				draw->texture[sampler] = 0;
    361 			}
    362 
    363 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
    364 			{
    365 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
    366 				{
    367 					draw->texture[sampler] = context->texture[sampler];
    368 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
    369 
    370 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
    371 				}
    372 			}
    373 
    374 			if(context->pixelShader)
    375 			{
    376 				if(draw->psDirtyConstF)
    377 				{
    378 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
    379 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
    380 					draw->psDirtyConstF = 0;
    381 				}
    382 
    383 				if(draw->psDirtyConstI)
    384 				{
    385 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
    386 					draw->psDirtyConstI = 0;
    387 				}
    388 
    389 				if(draw->psDirtyConstB)
    390 				{
    391 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
    392 					draw->psDirtyConstB = 0;
    393 				}
    394 
    395 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
    396 			}
    397 			else
    398 			{
    399 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    400 				{
    401 					draw->pUniformBuffers[i] = nullptr;
    402 				}
    403 			}
    404 
    405 			if(context->pixelShaderVersion() <= 0x0104)
    406 			{
    407 				for(int stage = 0; stage < 8; stage++)
    408 				{
    409 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
    410 					{
    411 						data->textureStage[stage] = context->textureStage[stage].uniforms;
    412 					}
    413 					else break;
    414 				}
    415 			}
    416 
    417 			if(context->vertexShader)
    418 			{
    419 				if(context->vertexShader->getVersion() >= 0x0300)
    420 				{
    421 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
    422 					{
    423 						if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
    424 						{
    425 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
    426 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
    427 
    428 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
    429 						}
    430 					}
    431 				}
    432 
    433 				if(draw->vsDirtyConstF)
    434 				{
    435 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
    436 					draw->vsDirtyConstF = 0;
    437 				}
    438 
    439 				if(draw->vsDirtyConstI)
    440 				{
    441 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
    442 					draw->vsDirtyConstI = 0;
    443 				}
    444 
    445 				if(draw->vsDirtyConstB)
    446 				{
    447 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
    448 					draw->vsDirtyConstB = 0;
    449 				}
    450 
    451 				if(context->vertexShader->isInstanceIdDeclared())
    452 				{
    453 					data->instanceID = context->instanceID;
    454 				}
    455 
    456 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
    457 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
    458 			}
    459 			else
    460 			{
    461 				data->ff = ff;
    462 
    463 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
    464 				draw->vsDirtyConstI = 16;
    465 				draw->vsDirtyConstB = 16;
    466 
    467 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    468 				{
    469 					draw->vUniformBuffers[i] = nullptr;
    470 				}
    471 
    472 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
    473 				{
    474 					draw->transformFeedbackBuffers[i] = nullptr;
    475 				}
    476 			}
    477 
    478 			if(pixelState.stencilActive)
    479 			{
    480 				data->stencil[0] = stencil;
    481 				data->stencil[1] = stencilCCW;
    482 			}
    483 
    484 			if(pixelState.fogActive)
    485 			{
    486 				data->fog = fog;
    487 			}
    488 
    489 			if(setupState.isDrawPoint)
    490 			{
    491 				data->point = point;
    492 			}
    493 
    494 			data->lineWidth = context->lineWidth;
    495 
    496 			data->factor = factor;
    497 
    498 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
    499 			{
    500 				float ref = context->alphaReference * (1.0f / 255.0f);
    501 				float margin = sw::min(ref, 1.0f - ref);
    502 
    503 				if(ms == 4)
    504 				{
    505 					data->a2c0 = replicate(ref - margin * 0.6f);
    506 					data->a2c1 = replicate(ref - margin * 0.2f);
    507 					data->a2c2 = replicate(ref + margin * 0.2f);
    508 					data->a2c3 = replicate(ref + margin * 0.6f);
    509 				}
    510 				else if(ms == 2)
    511 				{
    512 					data->a2c0 = replicate(ref - margin * 0.3f);
    513 					data->a2c1 = replicate(ref + margin * 0.3f);
    514 				}
    515 				else ASSERT(false);
    516 			}
    517 
    518 			if(pixelState.occlusionEnabled)
    519 			{
    520 				for(int cluster = 0; cluster < clusterCount; cluster++)
    521 				{
    522 					data->occlusion[cluster] = 0;
    523 				}
    524 			}
    525 
    526 			#if PERF_PROFILE
    527 				for(int cluster = 0; cluster < clusterCount; cluster++)
    528 				{
    529 					for(int i = 0; i < PERF_TIMERS; i++)
    530 					{
    531 						data->cycles[i][cluster] = 0;
    532 					}
    533 				}
    534 			#endif
    535 
    536 			// Viewport
    537 			{
    538 				float W = 0.5f * viewport.width;
    539 				float H = 0.5f * viewport.height;
    540 				float X0 = viewport.x0 + W;
    541 				float Y0 = viewport.y0 + H;
    542 				float N = viewport.minZ;
    543 				float F = viewport.maxZ;
    544 				float Z = F - N;
    545 
    546 				if(context->isDrawTriangle(false))
    547 				{
    548 					N += depthBias;
    549 				}
    550 
    551 				if(complementaryDepthBuffer)
    552 				{
    553 					Z = -Z;
    554 					N = 1 - N;
    555 				}
    556 
    557 				static const float X[5][16] =   // Fragment offsets
    558 				{
    559 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    560 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    561 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    562 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    563 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
    564 				};
    565 
    566 				static const float Y[5][16] =   // Fragment offsets
    567 				{
    568 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    569 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    570 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    571 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    572 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
    573 				};
    574 
    575 				int s = sw::log2(ss);
    576 
    577 				data->Wx16 = replicate(W * 16);
    578 				data->Hx16 = replicate(H * 16);
    579 				data->X0x16 = replicate(X0 * 16 - 8);
    580 				data->Y0x16 = replicate(Y0 * 16 - 8);
    581 				data->XXXX = replicate(X[s][q] / W);
    582 				data->YYYY = replicate(Y[s][q] / H);
    583 				data->halfPixelX = replicate(0.5f / W);
    584 				data->halfPixelY = replicate(0.5f / H);
    585 				data->viewportHeight = abs(viewport.height);
    586 				data->slopeDepthBias = slopeDepthBias;
    587 				data->depthRange = Z;
    588 				data->depthNear = N;
    589 				draw->clipFlags = clipFlags;
    590 
    591 				if(clipFlags)
    592 				{
    593 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
    594 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
    595 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
    596 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
    597 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
    598 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
    599 				}
    600 			}
    601 
    602 			// Target
    603 			{
    604 				for(int index = 0; index < RENDERTARGETS; index++)
    605 				{
    606 					draw->renderTarget[index] = context->renderTarget[index];
    607 
    608 					if(draw->renderTarget[index])
    609 					{
    610 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
    611 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
    612 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
    613 					}
    614 				}
    615 
    616 				draw->depthBuffer = context->depthBuffer;
    617 				draw->stencilBuffer = context->stencilBuffer;
    618 
    619 				if(draw->depthBuffer)
    620 				{
    621 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
    622 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
    623 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
    624 				}
    625 
    626 				if(draw->stencilBuffer)
    627 				{
    628 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
    629 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
    630 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
    631 				}
    632 			}
    633 
    634 			// Scissor
    635 			{
    636 				data->scissorX0 = scissor.x0;
    637 				data->scissorX1 = scissor.x1;
    638 				data->scissorY0 = scissor.y0;
    639 				data->scissorY1 = scissor.y1;
    640 			}
    641 
    642 			draw->primitive = 0;
    643 			draw->count = count;
    644 
    645 			draw->references = (count + batch - 1) / batch;
    646 
    647 			schedulerMutex.lock();
    648 			nextDraw++;
    649 			schedulerMutex.unlock();
    650 
    651 			#ifndef NDEBUG
    652 			if(threadCount == 1)   // Use main thread for draw execution
    653 			{
    654 				threadsAwake = 1;
    655 				task[0].type = Task::RESUME;
    656 
    657 				taskLoop(0);
    658 			}
    659 			else
    660 			#endif
    661 			{
    662 				if(!threadsAwake)
    663 				{
    664 					suspend[0]->wait();
    665 
    666 					threadsAwake = 1;
    667 					task[0].type = Task::RESUME;
    668 
    669 					resume[0]->signal();
    670 				}
    671 			}
    672 		}
    673 	}
    674 
    675 	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
    676 	{
    677 		SliceRect rect = clearRect;
    678 		int samples = dest->getDepth();
    679 
    680 		for(rect.slice = 0; rect.slice < samples; rect.slice++)
    681 		{
    682 			blitter->clear(value, format, dest, rect, rgbaMask);
    683 		}
    684 	}
    685 
    686 	void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
    687 	{
    688 		blitter->blit(source, sRect, dest, dRect, filter, isStencil);
    689 	}
    690 
    691 	void Renderer::blit3D(Surface *source, Surface *dest)
    692 	{
    693 		blitter->blit3D(source, dest);
    694 	}
    695 
    696 	void Renderer::threadFunction(void *parameters)
    697 	{
    698 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
    699 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
    700 
    701 		if(logPrecision < IEEE)
    702 		{
    703 			CPUID::setFlushToZero(true);
    704 			CPUID::setDenormalsAreZero(true);
    705 		}
    706 
    707 		renderer->threadLoop(threadIndex);
    708 	}
    709 
    710 	void Renderer::threadLoop(int threadIndex)
    711 	{
    712 		while(!exitThreads)
    713 		{
    714 			taskLoop(threadIndex);
    715 
    716 			suspend[threadIndex]->signal();
    717 			resume[threadIndex]->wait();
    718 		}
    719 	}
    720 
    721 	void Renderer::taskLoop(int threadIndex)
    722 	{
    723 		while(task[threadIndex].type != Task::SUSPEND)
    724 		{
    725 			scheduleTask(threadIndex);
    726 			executeTask(threadIndex);
    727 		}
    728 	}
    729 
    730 	void Renderer::findAvailableTasks()
    731 	{
    732 		// Find pixel tasks
    733 		for(int cluster = 0; cluster < clusterCount; cluster++)
    734 		{
    735 			if(!pixelProgress[cluster].executing)
    736 			{
    737 				for(int unit = 0; unit < unitCount; unit++)
    738 				{
    739 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
    740 					{
    741 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
    742 						{
    743 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
    744 							{
    745 								Task &task = taskQueue[qHead];
    746 								task.type = Task::PIXELS;
    747 								task.primitiveUnit = unit;
    748 								task.pixelCluster = cluster;
    749 
    750 								pixelProgress[cluster].executing = true;
    751 
    752 								// Commit to the task queue
    753 								qHead = (qHead + 1) % 32;
    754 								qSize++;
    755 
    756 								break;
    757 							}
    758 						}
    759 					}
    760 				}
    761 			}
    762 		}
    763 
    764 		// Find primitive tasks
    765 		if(currentDraw == nextDraw)
    766 		{
    767 			return;   // No more primitives to process
    768 		}
    769 
    770 		for(int unit = 0; unit < unitCount; unit++)
    771 		{
    772 			DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
    773 
    774 			if(draw->primitive >= draw->count)
    775 			{
    776 				currentDraw++;
    777 
    778 				if(currentDraw == nextDraw)
    779 				{
    780 					return;   // No more primitives to process
    781 				}
    782 
    783 				draw = drawList[currentDraw % DRAW_COUNT];
    784 			}
    785 
    786 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
    787 			{
    788 				int primitive = draw->primitive;
    789 				int count = draw->count;
    790 				int batch = draw->batchSize;
    791 
    792 				primitiveProgress[unit].drawCall = currentDraw;
    793 				primitiveProgress[unit].firstPrimitive = primitive;
    794 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
    795 
    796 				draw->primitive += batch;
    797 
    798 				Task &task = taskQueue[qHead];
    799 				task.type = Task::PRIMITIVES;
    800 				task.primitiveUnit = unit;
    801 
    802 				primitiveProgress[unit].references = -1;
    803 
    804 				// Commit to the task queue
    805 				qHead = (qHead + 1) % 32;
    806 				qSize++;
    807 			}
    808 		}
    809 	}
    810 
    811 	void Renderer::scheduleTask(int threadIndex)
    812 	{
    813 		schedulerMutex.lock();
    814 
    815 		if((int)qSize < threadCount - threadsAwake + 1)
    816 		{
    817 			findAvailableTasks();
    818 		}
    819 
    820 		if(qSize != 0)
    821 		{
    822 			task[threadIndex] = taskQueue[(qHead - qSize) % 32];
    823 			qSize--;
    824 
    825 			if(threadsAwake != threadCount)
    826 			{
    827 				int wakeup = qSize - threadsAwake + 1;
    828 
    829 				for(int i = 0; i < threadCount && wakeup > 0; i++)
    830 				{
    831 					if(task[i].type == Task::SUSPEND)
    832 					{
    833 						suspend[i]->wait();
    834 						task[i].type = Task::RESUME;
    835 						resume[i]->signal();
    836 
    837 						threadsAwake++;
    838 						wakeup--;
    839 					}
    840 				}
    841 			}
    842 		}
    843 		else
    844 		{
    845 			task[threadIndex].type = Task::SUSPEND;
    846 
    847 			threadsAwake--;
    848 		}
    849 
    850 		schedulerMutex.unlock();
    851 	}
    852 
    853 	void Renderer::executeTask(int threadIndex)
    854 	{
    855 		#if PERF_HUD
    856 			int64_t startTick = Timer::ticks();
    857 		#endif
    858 
    859 		switch(task[threadIndex].type)
    860 		{
    861 		case Task::PRIMITIVES:
    862 			{
    863 				int unit = task[threadIndex].primitiveUnit;
    864 
    865 				int input = primitiveProgress[unit].firstPrimitive;
    866 				int count = primitiveProgress[unit].primitiveCount;
    867 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
    868 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
    869 
    870 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
    871 
    872 				#if PERF_HUD
    873 					int64_t time = Timer::ticks();
    874 					vertexTime[threadIndex] += time - startTick;
    875 					startTick = time;
    876 				#endif
    877 
    878 				int visible = 0;
    879 
    880 				if(!draw->setupState.rasterizerDiscard)
    881 				{
    882 					visible = (this->*setupPrimitives)(unit, count);
    883 				}
    884 
    885 				primitiveProgress[unit].visible = visible;
    886 				primitiveProgress[unit].references = clusterCount;
    887 
    888 				#if PERF_HUD
    889 					setupTime[threadIndex] += Timer::ticks() - startTick;
    890 				#endif
    891 			}
    892 			break;
    893 		case Task::PIXELS:
    894 			{
    895 				int unit = task[threadIndex].primitiveUnit;
    896 				int visible = primitiveProgress[unit].visible;
    897 
    898 				if(visible > 0)
    899 				{
    900 					int cluster = task[threadIndex].pixelCluster;
    901 					Primitive *primitive = primitiveBatch[unit];
    902 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
    903 					DrawData *data = draw->data;
    904 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
    905 
    906 					pixelRoutine(primitive, visible, cluster, data);
    907 				}
    908 
    909 				finishRendering(task[threadIndex]);
    910 
    911 				#if PERF_HUD
    912 					pixelTime[threadIndex] += Timer::ticks() - startTick;
    913 				#endif
    914 			}
    915 			break;
    916 		case Task::RESUME:
    917 			break;
    918 		case Task::SUSPEND:
    919 			break;
    920 		default:
    921 			ASSERT(false);
    922 		}
    923 	}
    924 
    925 	void Renderer::synchronize()
    926 	{
    927 		sync->lock(sw::PUBLIC);
    928 		sync->unlock();
    929 	}
    930 
    931 	void Renderer::finishRendering(Task &pixelTask)
    932 	{
    933 		int unit = pixelTask.primitiveUnit;
    934 		int cluster = pixelTask.pixelCluster;
    935 
    936 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
    937 		DrawData &data = *draw.data;
    938 		int primitive = primitiveProgress[unit].firstPrimitive;
    939 		int count = primitiveProgress[unit].primitiveCount;
    940 		int processedPrimitives = primitive + count;
    941 
    942 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
    943 
    944 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
    945 		{
    946 			pixelProgress[cluster].drawCall++;
    947 			pixelProgress[cluster].processedPrimitives = 0;
    948 		}
    949 
    950 		int ref = atomicDecrement(&primitiveProgress[unit].references);
    951 
    952 		if(ref == 0)
    953 		{
    954 			ref = atomicDecrement(&draw.references);
    955 
    956 			if(ref == 0)
    957 			{
    958 				#if PERF_PROFILE
    959 					for(int cluster = 0; cluster < clusterCount; cluster++)
    960 					{
    961 						for(int i = 0; i < PERF_TIMERS; i++)
    962 						{
    963 							profiler.cycles[i] += data.cycles[i][cluster];
    964 						}
    965 					}
    966 				#endif
    967 
    968 				if(draw.queries)
    969 				{
    970 					for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
    971 					{
    972 						Query *query = *q;
    973 
    974 						switch(query->type)
    975 						{
    976 						case Query::FRAGMENTS_PASSED:
    977 							for(int cluster = 0; cluster < clusterCount; cluster++)
    978 							{
    979 								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
    980 							}
    981 							break;
    982 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
    983 							atomicAdd((volatile int*)&query->data, processedPrimitives);
    984 							break;
    985 						default:
    986 							break;
    987 						}
    988 
    989 						atomicDecrement(&query->reference);
    990 					}
    991 
    992 					delete draw.queries;
    993 					draw.queries = 0;
    994 				}
    995 
    996 				for(int i = 0; i < RENDERTARGETS; i++)
    997 				{
    998 					if(draw.renderTarget[i])
    999 					{
   1000 						draw.renderTarget[i]->unlockInternal();
   1001 					}
   1002 				}
   1003 
   1004 				if(draw.depthBuffer)
   1005 				{
   1006 					draw.depthBuffer->unlockInternal();
   1007 				}
   1008 
   1009 				if(draw.stencilBuffer)
   1010 				{
   1011 					draw.stencilBuffer->unlockStencil();
   1012 				}
   1013 
   1014 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
   1015 				{
   1016 					if(draw.texture[i])
   1017 					{
   1018 						draw.texture[i]->unlock();
   1019 					}
   1020 				}
   1021 
   1022 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
   1023 				{
   1024 					if(draw.vertexStream[i])
   1025 					{
   1026 						draw.vertexStream[i]->unlock();
   1027 					}
   1028 				}
   1029 
   1030 				if(draw.indexBuffer)
   1031 				{
   1032 					draw.indexBuffer->unlock();
   1033 				}
   1034 
   1035 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
   1036 				{
   1037 					if(draw.pUniformBuffers[i])
   1038 					{
   1039 						draw.pUniformBuffers[i]->unlock();
   1040 					}
   1041 					if(draw.vUniformBuffers[i])
   1042 					{
   1043 						draw.vUniformBuffers[i]->unlock();
   1044 					}
   1045 				}
   1046 
   1047 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
   1048 				{
   1049 					if(draw.transformFeedbackBuffers[i])
   1050 					{
   1051 						draw.transformFeedbackBuffers[i]->unlock();
   1052 					}
   1053 				}
   1054 
   1055 				draw.vertexRoutine->unbind();
   1056 				draw.setupRoutine->unbind();
   1057 				draw.pixelRoutine->unbind();
   1058 
   1059 				sync->unlock();
   1060 
   1061 				draw.references = -1;
   1062 				resumeApp->signal();
   1063 			}
   1064 		}
   1065 
   1066 		pixelProgress[cluster].executing = false;
   1067 	}
   1068 
   1069 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
   1070 	{
   1071 		Triangle *triangle = triangleBatch[unit];
   1072 		DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1073 		DrawData *data = draw->data;
   1074 		VertexTask *task = vertexTask[thread];
   1075 
   1076 		const void *indices = data->indices;
   1077 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
   1078 
   1079 		if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
   1080 		{
   1081 			task->vertexCache.clear();
   1082 			task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
   1083 		}
   1084 
   1085 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
   1086 
   1087 		switch(draw->drawType)
   1088 		{
   1089 		case DRAW_POINTLIST:
   1090 			{
   1091 				unsigned int index = start;
   1092 
   1093 				for(unsigned int i = 0; i < triangleCount; i++)
   1094 				{
   1095 					batch[i][0] = index;
   1096 					batch[i][1] = index;
   1097 					batch[i][2] = index;
   1098 
   1099 					index += 1;
   1100 				}
   1101 			}
   1102 			break;
   1103 		case DRAW_LINELIST:
   1104 			{
   1105 				unsigned int index = 2 * start;
   1106 
   1107 				for(unsigned int i = 0; i < triangleCount; i++)
   1108 				{
   1109 					batch[i][0] = index + 0;
   1110 					batch[i][1] = index + 1;
   1111 					batch[i][2] = index + 1;
   1112 
   1113 					index += 2;
   1114 				}
   1115 			}
   1116 			break;
   1117 		case DRAW_LINESTRIP:
   1118 			{
   1119 				unsigned int index = start;
   1120 
   1121 				for(unsigned int i = 0; i < triangleCount; i++)
   1122 				{
   1123 					batch[i][0] = index + 0;
   1124 					batch[i][1] = index + 1;
   1125 					batch[i][2] = index + 1;
   1126 
   1127 					index += 1;
   1128 				}
   1129 			}
   1130 			break;
   1131 		case DRAW_LINELOOP:
   1132 			{
   1133 				unsigned int index = start;
   1134 
   1135 				for(unsigned int i = 0; i < triangleCount; i++)
   1136 				{
   1137 					batch[i][0] = (index + 0) % loop;
   1138 					batch[i][1] = (index + 1) % loop;
   1139 					batch[i][2] = (index + 1) % loop;
   1140 
   1141 					index += 1;
   1142 				}
   1143 			}
   1144 			break;
   1145 		case DRAW_TRIANGLELIST:
   1146 			{
   1147 				unsigned int index = 3 * start;
   1148 
   1149 				for(unsigned int i = 0; i < triangleCount; i++)
   1150 				{
   1151 					batch[i][0] = index + 0;
   1152 					batch[i][1] = index + 1;
   1153 					batch[i][2] = index + 2;
   1154 
   1155 					index += 3;
   1156 				}
   1157 			}
   1158 			break;
   1159 		case DRAW_TRIANGLESTRIP:
   1160 			{
   1161 				unsigned int index = start;
   1162 
   1163 				for(unsigned int i = 0; i < triangleCount; i++)
   1164 				{
   1165 					batch[i][0] = index + 0;
   1166 					batch[i][1] = index + (index & 1) + 1;
   1167 					batch[i][2] = index + (~index & 1) + 1;
   1168 
   1169 					index += 1;
   1170 				}
   1171 			}
   1172 			break;
   1173 		case DRAW_TRIANGLEFAN:
   1174 			{
   1175 				unsigned int index = start;
   1176 
   1177 				for(unsigned int i = 0; i < triangleCount; i++)
   1178 				{
   1179 					batch[i][0] = index + 1;
   1180 					batch[i][1] = index + 2;
   1181 					batch[i][2] = 0;
   1182 
   1183 					index += 1;
   1184 				}
   1185 			}
   1186 			break;
   1187 		case DRAW_INDEXEDPOINTLIST8:
   1188 			{
   1189 				const unsigned char *index = (const unsigned char*)indices + start;
   1190 
   1191 				for(unsigned int i = 0; i < triangleCount; i++)
   1192 				{
   1193 					batch[i][0] = *index;
   1194 					batch[i][1] = *index;
   1195 					batch[i][2] = *index;
   1196 
   1197 					index += 1;
   1198 				}
   1199 			}
   1200 			break;
   1201 		case DRAW_INDEXEDPOINTLIST16:
   1202 			{
   1203 				const unsigned short *index = (const unsigned short*)indices + start;
   1204 
   1205 				for(unsigned int i = 0; i < triangleCount; i++)
   1206 				{
   1207 					batch[i][0] = *index;
   1208 					batch[i][1] = *index;
   1209 					batch[i][2] = *index;
   1210 
   1211 					index += 1;
   1212 				}
   1213 			}
   1214 			break;
   1215 		case DRAW_INDEXEDPOINTLIST32:
   1216 			{
   1217 				const unsigned int *index = (const unsigned int*)indices + start;
   1218 
   1219 				for(unsigned int i = 0; i < triangleCount; i++)
   1220 				{
   1221 					batch[i][0] = *index;
   1222 					batch[i][1] = *index;
   1223 					batch[i][2] = *index;
   1224 
   1225 					index += 1;
   1226 				}
   1227 			}
   1228 			break;
   1229 		case DRAW_INDEXEDLINELIST8:
   1230 			{
   1231 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
   1232 
   1233 				for(unsigned int i = 0; i < triangleCount; i++)
   1234 				{
   1235 					batch[i][0] = index[0];
   1236 					batch[i][1] = index[1];
   1237 					batch[i][2] = index[1];
   1238 
   1239 					index += 2;
   1240 				}
   1241 			}
   1242 			break;
   1243 		case DRAW_INDEXEDLINELIST16:
   1244 			{
   1245 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
   1246 
   1247 				for(unsigned int i = 0; i < triangleCount; i++)
   1248 				{
   1249 					batch[i][0] = index[0];
   1250 					batch[i][1] = index[1];
   1251 					batch[i][2] = index[1];
   1252 
   1253 					index += 2;
   1254 				}
   1255 			}
   1256 			break;
   1257 		case DRAW_INDEXEDLINELIST32:
   1258 			{
   1259 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
   1260 
   1261 				for(unsigned int i = 0; i < triangleCount; i++)
   1262 				{
   1263 					batch[i][0] = index[0];
   1264 					batch[i][1] = index[1];
   1265 					batch[i][2] = index[1];
   1266 
   1267 					index += 2;
   1268 				}
   1269 			}
   1270 			break;
   1271 		case DRAW_INDEXEDLINESTRIP8:
   1272 			{
   1273 				const unsigned char *index = (const unsigned char*)indices + start;
   1274 
   1275 				for(unsigned int i = 0; i < triangleCount; i++)
   1276 				{
   1277 					batch[i][0] = index[0];
   1278 					batch[i][1] = index[1];
   1279 					batch[i][2] = index[1];
   1280 
   1281 					index += 1;
   1282 				}
   1283 			}
   1284 			break;
   1285 		case DRAW_INDEXEDLINESTRIP16:
   1286 			{
   1287 				const unsigned short *index = (const unsigned short*)indices + start;
   1288 
   1289 				for(unsigned int i = 0; i < triangleCount; i++)
   1290 				{
   1291 					batch[i][0] = index[0];
   1292 					batch[i][1] = index[1];
   1293 					batch[i][2] = index[1];
   1294 
   1295 					index += 1;
   1296 				}
   1297 			}
   1298 			break;
   1299 		case DRAW_INDEXEDLINESTRIP32:
   1300 			{
   1301 				const unsigned int *index = (const unsigned int*)indices + start;
   1302 
   1303 				for(unsigned int i = 0; i < triangleCount; i++)
   1304 				{
   1305 					batch[i][0] = index[0];
   1306 					batch[i][1] = index[1];
   1307 					batch[i][2] = index[1];
   1308 
   1309 					index += 1;
   1310 				}
   1311 			}
   1312 			break;
   1313 		case DRAW_INDEXEDLINELOOP8:
   1314 			{
   1315 				const unsigned char *index = (const unsigned char*)indices;
   1316 
   1317 				for(unsigned int i = 0; i < triangleCount; i++)
   1318 				{
   1319 					batch[i][0] = index[(start + i + 0) % loop];
   1320 					batch[i][1] = index[(start + i + 1) % loop];
   1321 					batch[i][2] = index[(start + i + 1) % loop];
   1322 				}
   1323 			}
   1324 			break;
   1325 		case DRAW_INDEXEDLINELOOP16:
   1326 			{
   1327 				const unsigned short *index = (const unsigned short*)indices;
   1328 
   1329 				for(unsigned int i = 0; i < triangleCount; i++)
   1330 				{
   1331 					batch[i][0] = index[(start + i + 0) % loop];
   1332 					batch[i][1] = index[(start + i + 1) % loop];
   1333 					batch[i][2] = index[(start + i + 1) % loop];
   1334 				}
   1335 			}
   1336 			break;
   1337 		case DRAW_INDEXEDLINELOOP32:
   1338 			{
   1339 				const unsigned int *index = (const unsigned int*)indices;
   1340 
   1341 				for(unsigned int i = 0; i < triangleCount; i++)
   1342 				{
   1343 					batch[i][0] = index[(start + i + 0) % loop];
   1344 					batch[i][1] = index[(start + i + 1) % loop];
   1345 					batch[i][2] = index[(start + i + 1) % loop];
   1346 				}
   1347 			}
   1348 			break;
   1349 		case DRAW_INDEXEDTRIANGLELIST8:
   1350 			{
   1351 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
   1352 
   1353 				for(unsigned int i = 0; i < triangleCount; i++)
   1354 				{
   1355 					batch[i][0] = index[0];
   1356 					batch[i][1] = index[1];
   1357 					batch[i][2] = index[2];
   1358 
   1359 					index += 3;
   1360 				}
   1361 			}
   1362 			break;
   1363 		case DRAW_INDEXEDTRIANGLELIST16:
   1364 			{
   1365 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
   1366 
   1367 				for(unsigned int i = 0; i < triangleCount; i++)
   1368 				{
   1369 					batch[i][0] = index[0];
   1370 					batch[i][1] = index[1];
   1371 					batch[i][2] = index[2];
   1372 
   1373 					index += 3;
   1374 				}
   1375 			}
   1376 			break;
   1377 		case DRAW_INDEXEDTRIANGLELIST32:
   1378 			{
   1379 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
   1380 
   1381 				for(unsigned int i = 0; i < triangleCount; i++)
   1382 				{
   1383 					batch[i][0] = index[0];
   1384 					batch[i][1] = index[1];
   1385 					batch[i][2] = index[2];
   1386 
   1387 					index += 3;
   1388 				}
   1389 			}
   1390 			break;
   1391 		case DRAW_INDEXEDTRIANGLESTRIP8:
   1392 			{
   1393 				const unsigned char *index = (const unsigned char*)indices + start;
   1394 
   1395 				for(unsigned int i = 0; i < triangleCount; i++)
   1396 				{
   1397 					batch[i][0] = index[0];
   1398 					batch[i][1] = index[((start + i) & 1) + 1];
   1399 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1400 
   1401 					index += 1;
   1402 				}
   1403 			}
   1404 			break;
   1405 		case DRAW_INDEXEDTRIANGLESTRIP16:
   1406 			{
   1407 				const unsigned short *index = (const unsigned short*)indices + start;
   1408 
   1409 				for(unsigned int i = 0; i < triangleCount; i++)
   1410 				{
   1411 					batch[i][0] = index[0];
   1412 					batch[i][1] = index[((start + i) & 1) + 1];
   1413 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1414 
   1415 					index += 1;
   1416 				}
   1417 			}
   1418 			break;
   1419 		case DRAW_INDEXEDTRIANGLESTRIP32:
   1420 			{
   1421 				const unsigned int *index = (const unsigned int*)indices + start;
   1422 
   1423 				for(unsigned int i = 0; i < triangleCount; i++)
   1424 				{
   1425 					batch[i][0] = index[0];
   1426 					batch[i][1] = index[((start + i) & 1) + 1];
   1427 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1428 
   1429 					index += 1;
   1430 				}
   1431 			}
   1432 			break;
   1433 		case DRAW_INDEXEDTRIANGLEFAN8:
   1434 			{
   1435 				const unsigned char *index = (const unsigned char*)indices;
   1436 
   1437 				for(unsigned int i = 0; i < triangleCount; i++)
   1438 				{
   1439 					batch[i][0] = index[start + i + 1];
   1440 					batch[i][1] = index[start + i + 2];
   1441 					batch[i][2] = index[0];
   1442 				}
   1443 			}
   1444 			break;
   1445 		case DRAW_INDEXEDTRIANGLEFAN16:
   1446 			{
   1447 				const unsigned short *index = (const unsigned short*)indices;
   1448 
   1449 				for(unsigned int i = 0; i < triangleCount; i++)
   1450 				{
   1451 					batch[i][0] = index[start + i + 1];
   1452 					batch[i][1] = index[start + i + 2];
   1453 					batch[i][2] = index[0];
   1454 				}
   1455 			}
   1456 			break;
   1457 		case DRAW_INDEXEDTRIANGLEFAN32:
   1458 			{
   1459 				const unsigned int *index = (const unsigned int*)indices;
   1460 
   1461 				for(unsigned int i = 0; i < triangleCount; i++)
   1462 				{
   1463 					batch[i][0] = index[start + i + 1];
   1464 					batch[i][1] = index[start + i + 2];
   1465 					batch[i][2] = index[0];
   1466 				}
   1467 			}
   1468 			break;
   1469 		case DRAW_QUADLIST:
   1470 			{
   1471 				unsigned int index = 4 * start / 2;
   1472 
   1473 				for(unsigned int i = 0; i < triangleCount; i += 2)
   1474 				{
   1475 					batch[i+0][0] = index + 0;
   1476 					batch[i+0][1] = index + 1;
   1477 					batch[i+0][2] = index + 2;
   1478 
   1479 					batch[i+1][0] = index + 0;
   1480 					batch[i+1][1] = index + 2;
   1481 					batch[i+1][2] = index + 3;
   1482 
   1483 					index += 4;
   1484 				}
   1485 			}
   1486 			break;
   1487 		default:
   1488 			ASSERT(false);
   1489 			return;
   1490 		}
   1491 
   1492 		task->primitiveStart = start;
   1493 		task->vertexCount = triangleCount * 3;
   1494 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
   1495 	}
   1496 
   1497 	int Renderer::setupSolidTriangles(int unit, int count)
   1498 	{
   1499 		Triangle *triangle = triangleBatch[unit];
   1500 		Primitive *primitive = primitiveBatch[unit];
   1501 
   1502 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1503 		SetupProcessor::State &state = draw.setupState;
   1504 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1505 
   1506 		int ms = state.multiSample;
   1507 		int pos = state.positionRegister;
   1508 		const DrawData *data = draw.data;
   1509 		int visible = 0;
   1510 
   1511 		for(int i = 0; i < count; i++, triangle++)
   1512 		{
   1513 			Vertex &v0 = triangle->v0;
   1514 			Vertex &v1 = triangle->v1;
   1515 			Vertex &v2 = triangle->v2;
   1516 
   1517 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
   1518 			{
   1519 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
   1520 
   1521 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
   1522 
   1523 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1524 				{
   1525 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1526 					{
   1527 						continue;
   1528 					}
   1529 				}
   1530 
   1531 				if(setupRoutine(primitive, triangle, &polygon, data))
   1532 				{
   1533 					primitive += ms;
   1534 					visible++;
   1535 				}
   1536 			}
   1537 		}
   1538 
   1539 		return visible;
   1540 	}
   1541 
   1542 	int Renderer::setupWireframeTriangle(int unit, int count)
   1543 	{
   1544 		Triangle *triangle = triangleBatch[unit];
   1545 		Primitive *primitive = primitiveBatch[unit];
   1546 		int visible = 0;
   1547 
   1548 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1549 		SetupProcessor::State &state = draw.setupState;
   1550 
   1551 		const Vertex &v0 = triangle[0].v0;
   1552 		const Vertex &v1 = triangle[0].v1;
   1553 		const Vertex &v2 = triangle[0].v2;
   1554 
   1555 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1556 
   1557 		if(state.cullMode == CULL_CLOCKWISE)
   1558 		{
   1559 			if(d >= 0) return 0;
   1560 		}
   1561 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1562 		{
   1563 			if(d <= 0) return 0;
   1564 		}
   1565 
   1566 		// Copy attributes
   1567 		triangle[1].v0 = v1;
   1568 		triangle[1].v1 = v2;
   1569 		triangle[2].v0 = v2;
   1570 		triangle[2].v1 = v0;
   1571 
   1572 		if(state.color[0][0].flat)   // FIXME
   1573 		{
   1574 			for(int i = 0; i < 2; i++)
   1575 			{
   1576 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
   1577 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
   1578 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
   1579 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
   1580 			}
   1581 		}
   1582 
   1583 		for(int i = 0; i < 3; i++)
   1584 		{
   1585 			if(setupLine(*primitive, *triangle, draw))
   1586 			{
   1587 				primitive->area = 0.5f * d;
   1588 
   1589 				primitive++;
   1590 				visible++;
   1591 			}
   1592 
   1593 			triangle++;
   1594 		}
   1595 
   1596 		return visible;
   1597 	}
   1598 
   1599 	int Renderer::setupVertexTriangle(int unit, int count)
   1600 	{
   1601 		Triangle *triangle = triangleBatch[unit];
   1602 		Primitive *primitive = primitiveBatch[unit];
   1603 		int visible = 0;
   1604 
   1605 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1606 		SetupProcessor::State &state = draw.setupState;
   1607 
   1608 		const Vertex &v0 = triangle[0].v0;
   1609 		const Vertex &v1 = triangle[0].v1;
   1610 		const Vertex &v2 = triangle[0].v2;
   1611 
   1612 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1613 
   1614 		if(state.cullMode == CULL_CLOCKWISE)
   1615 		{
   1616 			if(d >= 0) return 0;
   1617 		}
   1618 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1619 		{
   1620 			if(d <= 0) return 0;
   1621 		}
   1622 
   1623 		// Copy attributes
   1624 		triangle[1].v0 = v1;
   1625 		triangle[2].v0 = v2;
   1626 
   1627 		for(int i = 0; i < 3; i++)
   1628 		{
   1629 			if(setupPoint(*primitive, *triangle, draw))
   1630 			{
   1631 				primitive->area = 0.5f * d;
   1632 
   1633 				primitive++;
   1634 				visible++;
   1635 			}
   1636 
   1637 			triangle++;
   1638 		}
   1639 
   1640 		return visible;
   1641 	}
   1642 
   1643 	int Renderer::setupLines(int unit, int count)
   1644 	{
   1645 		Triangle *triangle = triangleBatch[unit];
   1646 		Primitive *primitive = primitiveBatch[unit];
   1647 		int visible = 0;
   1648 
   1649 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1650 		SetupProcessor::State &state = draw.setupState;
   1651 
   1652 		int ms = state.multiSample;
   1653 
   1654 		for(int i = 0; i < count; i++)
   1655 		{
   1656 			if(setupLine(*primitive, *triangle, draw))
   1657 			{
   1658 				primitive += ms;
   1659 				visible++;
   1660 			}
   1661 
   1662 			triangle++;
   1663 		}
   1664 
   1665 		return visible;
   1666 	}
   1667 
   1668 	int Renderer::setupPoints(int unit, int count)
   1669 	{
   1670 		Triangle *triangle = triangleBatch[unit];
   1671 		Primitive *primitive = primitiveBatch[unit];
   1672 		int visible = 0;
   1673 
   1674 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
   1675 		SetupProcessor::State &state = draw.setupState;
   1676 
   1677 		int ms = state.multiSample;
   1678 
   1679 		for(int i = 0; i < count; i++)
   1680 		{
   1681 			if(setupPoint(*primitive, *triangle, draw))
   1682 			{
   1683 				primitive += ms;
   1684 				visible++;
   1685 			}
   1686 
   1687 			triangle++;
   1688 		}
   1689 
   1690 		return visible;
   1691 	}
   1692 
   1693 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1694 	{
   1695 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1696 		const SetupProcessor::State &state = draw.setupState;
   1697 		const DrawData &data = *draw.data;
   1698 
   1699 		float lineWidth = data.lineWidth;
   1700 
   1701 		Vertex &v0 = triangle.v0;
   1702 		Vertex &v1 = triangle.v1;
   1703 
   1704 		int pos = state.positionRegister;
   1705 
   1706 		const float4 &P0 = v0.v[pos];
   1707 		const float4 &P1 = v1.v[pos];
   1708 
   1709 		if(P0.w <= 0 && P1.w <= 0)
   1710 		{
   1711 			return false;
   1712 		}
   1713 
   1714 		const float W = data.Wx16[0] * (1.0f / 16.0f);
   1715 		const float H = data.Hx16[0] * (1.0f / 16.0f);
   1716 
   1717 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
   1718 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
   1719 
   1720 		if(dx == 0 && dy == 0)
   1721 		{
   1722 			return false;
   1723 		}
   1724 
   1725 		if(false)   // Rectangle
   1726 		{
   1727 			float4 P[4];
   1728 			int C[4];
   1729 
   1730 			P[0] = P0;
   1731 			P[1] = P1;
   1732 			P[2] = P1;
   1733 			P[3] = P0;
   1734 
   1735 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
   1736 
   1737 			dx *= scale;
   1738 			dy *= scale;
   1739 
   1740 			float dx0w = dx * P0.w / W;
   1741 			float dy0h = dy * P0.w / H;
   1742 			float dx0h = dx * P0.w / H;
   1743 			float dy0w = dy * P0.w / W;
   1744 
   1745 			float dx1w = dx * P1.w / W;
   1746 			float dy1h = dy * P1.w / H;
   1747 			float dx1h = dx * P1.w / H;
   1748 			float dy1w = dy * P1.w / W;
   1749 
   1750 			P[0].x += -dy0w + -dx0w;
   1751 			P[0].y += -dx0h + +dy0h;
   1752 			C[0] = clipper->computeClipFlags(P[0]);
   1753 
   1754 			P[1].x += -dy1w + +dx1w;
   1755 			P[1].y += -dx1h + +dy1h;
   1756 			C[1] = clipper->computeClipFlags(P[1]);
   1757 
   1758 			P[2].x += +dy1w + +dx1w;
   1759 			P[2].y += +dx1h + -dy1h;
   1760 			C[2] = clipper->computeClipFlags(P[2]);
   1761 
   1762 			P[3].x += +dy0w + -dx0w;
   1763 			P[3].y += +dx0h + +dy0h;
   1764 			C[3] = clipper->computeClipFlags(P[3]);
   1765 
   1766 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1767 			{
   1768 				Polygon polygon(P, 4);
   1769 
   1770 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1771 
   1772 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1773 				{
   1774 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1775 					{
   1776 						return false;
   1777 					}
   1778 				}
   1779 
   1780 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1781 			}
   1782 		}
   1783 		else   // Diamond test convention
   1784 		{
   1785 			float4 P[8];
   1786 			int C[8];
   1787 
   1788 			P[0] = P0;
   1789 			P[1] = P0;
   1790 			P[2] = P0;
   1791 			P[3] = P0;
   1792 			P[4] = P1;
   1793 			P[5] = P1;
   1794 			P[6] = P1;
   1795 			P[7] = P1;
   1796 
   1797 			float dx0 = lineWidth * 0.5f * P0.w / W;
   1798 			float dy0 = lineWidth * 0.5f * P0.w / H;
   1799 
   1800 			float dx1 = lineWidth * 0.5f * P1.w / W;
   1801 			float dy1 = lineWidth * 0.5f * P1.w / H;
   1802 
   1803 			P[0].x += -dx0;
   1804 			C[0] = clipper->computeClipFlags(P[0]);
   1805 
   1806 			P[1].y += +dy0;
   1807 			C[1] = clipper->computeClipFlags(P[1]);
   1808 
   1809 			P[2].x += +dx0;
   1810 			C[2] = clipper->computeClipFlags(P[2]);
   1811 
   1812 			P[3].y += -dy0;
   1813 			C[3] = clipper->computeClipFlags(P[3]);
   1814 
   1815 			P[4].x += -dx1;
   1816 			C[4] = clipper->computeClipFlags(P[4]);
   1817 
   1818 			P[5].y += +dy1;
   1819 			C[5] = clipper->computeClipFlags(P[5]);
   1820 
   1821 			P[6].x += +dx1;
   1822 			C[6] = clipper->computeClipFlags(P[6]);
   1823 
   1824 			P[7].y += -dy1;
   1825 			C[7] = clipper->computeClipFlags(P[7]);
   1826 
   1827 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
   1828 			{
   1829 				float4 L[6];
   1830 
   1831 				if(dx > -dy)
   1832 				{
   1833 					if(dx > dy)   // Right
   1834 					{
   1835 						L[0] = P[0];
   1836 						L[1] = P[1];
   1837 						L[2] = P[5];
   1838 						L[3] = P[6];
   1839 						L[4] = P[7];
   1840 						L[5] = P[3];
   1841 					}
   1842 					else   // Down
   1843 					{
   1844 						L[0] = P[0];
   1845 						L[1] = P[4];
   1846 						L[2] = P[5];
   1847 						L[3] = P[6];
   1848 						L[4] = P[2];
   1849 						L[5] = P[3];
   1850 					}
   1851 				}
   1852 				else
   1853 				{
   1854 					if(dx > dy)   // Up
   1855 					{
   1856 						L[0] = P[0];
   1857 						L[1] = P[1];
   1858 						L[2] = P[2];
   1859 						L[3] = P[6];
   1860 						L[4] = P[7];
   1861 						L[5] = P[4];
   1862 					}
   1863 					else   // Left
   1864 					{
   1865 						L[0] = P[1];
   1866 						L[1] = P[2];
   1867 						L[2] = P[3];
   1868 						L[3] = P[7];
   1869 						L[4] = P[4];
   1870 						L[5] = P[5];
   1871 					}
   1872 				}
   1873 
   1874 				Polygon polygon(L, 6);
   1875 
   1876 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
   1877 
   1878 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1879 				{
   1880 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1881 					{
   1882 						return false;
   1883 					}
   1884 				}
   1885 
   1886 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1887 			}
   1888 		}
   1889 
   1890 		return false;
   1891 	}
   1892 
   1893 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1894 	{
   1895 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1896 		const SetupProcessor::State &state = draw.setupState;
   1897 		const DrawData &data = *draw.data;
   1898 
   1899 		Vertex &v = triangle.v0;
   1900 
   1901 		float pSize;
   1902 
   1903 		int pts = state.pointSizeRegister;
   1904 
   1905 		if(state.pointSizeRegister != Unused)
   1906 		{
   1907 			pSize = v.v[pts].y;
   1908 		}
   1909 		else
   1910 		{
   1911 			pSize = data.point.pointSize[0];
   1912 		}
   1913 
   1914 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
   1915 
   1916 		float4 P[4];
   1917 		int C[4];
   1918 
   1919 		int pos = state.positionRegister;
   1920 
   1921 		P[0] = v.v[pos];
   1922 		P[1] = v.v[pos];
   1923 		P[2] = v.v[pos];
   1924 		P[3] = v.v[pos];
   1925 
   1926 		const float X = pSize * P[0].w * data.halfPixelX[0];
   1927 		const float Y = pSize * P[0].w * data.halfPixelY[0];
   1928 
   1929 		P[0].x -= X;
   1930 		P[0].y += Y;
   1931 		C[0] = clipper->computeClipFlags(P[0]);
   1932 
   1933 		P[1].x += X;
   1934 		P[1].y += Y;
   1935 		C[1] = clipper->computeClipFlags(P[1]);
   1936 
   1937 		P[2].x += X;
   1938 		P[2].y -= Y;
   1939 		C[2] = clipper->computeClipFlags(P[2]);
   1940 
   1941 		P[3].x -= X;
   1942 		P[3].y -= Y;
   1943 		C[3] = clipper->computeClipFlags(P[3]);
   1944 
   1945 		triangle.v1 = triangle.v0;
   1946 		triangle.v2 = triangle.v0;
   1947 
   1948 		triangle.v1.X += iround(16 * 0.5f * pSize);
   1949 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
   1950 
   1951 		Polygon polygon(P, 4);
   1952 
   1953 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1954 		{
   1955 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1956 
   1957 			if(clipFlagsOr != Clipper::CLIP_FINITE)
   1958 			{
   1959 				if(!clipper->clip(polygon, clipFlagsOr, draw))
   1960 				{
   1961 					return false;
   1962 				}
   1963 			}
   1964 
   1965 			return setupRoutine(&primitive, &triangle, &polygon, &data);
   1966 		}
   1967 
   1968 		return false;
   1969 	}
   1970 
   1971 	void Renderer::initializeThreads()
   1972 	{
   1973 		unitCount = ceilPow2(threadCount);
   1974 		clusterCount = ceilPow2(threadCount);
   1975 
   1976 		for(int i = 0; i < unitCount; i++)
   1977 		{
   1978 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
   1979 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
   1980 		}
   1981 
   1982 		for(int i = 0; i < threadCount; i++)
   1983 		{
   1984 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
   1985 			vertexTask[i]->vertexCache.drawCall = -1;
   1986 
   1987 			task[i].type = Task::SUSPEND;
   1988 
   1989 			resume[i] = new Event();
   1990 			suspend[i] = new Event();
   1991 
   1992 			Parameters parameters;
   1993 			parameters.threadIndex = i;
   1994 			parameters.renderer = this;
   1995 
   1996 			exitThreads = false;
   1997 			worker[i] = new Thread(threadFunction, &parameters);
   1998 
   1999 			suspend[i]->wait();
   2000 			suspend[i]->signal();
   2001 		}
   2002 	}
   2003 
   2004 	void Renderer::terminateThreads()
   2005 	{
   2006 		while(threadsAwake != 0)
   2007 		{
   2008 			Thread::sleep(1);
   2009 		}
   2010 
   2011 		for(int thread = 0; thread < threadCount; thread++)
   2012 		{
   2013 			if(worker[thread])
   2014 			{
   2015 				exitThreads = true;
   2016 				resume[thread]->signal();
   2017 				worker[thread]->join();
   2018 
   2019 				delete worker[thread];
   2020 				worker[thread] = 0;
   2021 				delete resume[thread];
   2022 				resume[thread] = 0;
   2023 				delete suspend[thread];
   2024 				suspend[thread] = 0;
   2025 			}
   2026 
   2027 			deallocate(vertexTask[thread]);
   2028 			vertexTask[thread] = 0;
   2029 		}
   2030 
   2031 		for(int i = 0; i < 16; i++)
   2032 		{
   2033 			deallocate(triangleBatch[i]);
   2034 			triangleBatch[i] = 0;
   2035 
   2036 			deallocate(primitiveBatch[i]);
   2037 			primitiveBatch[i] = 0;
   2038 		}
   2039 	}
   2040 
   2041 	void Renderer::loadConstants(const VertexShader *vertexShader)
   2042 	{
   2043 		if(!vertexShader) return;
   2044 
   2045 		size_t count = vertexShader->getLength();
   2046 
   2047 		for(size_t i = 0; i < count; i++)
   2048 		{
   2049 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
   2050 
   2051 			if(instruction->opcode == Shader::OPCODE_DEF)
   2052 			{
   2053 				int index = instruction->dst.index;
   2054 				float value[4];
   2055 
   2056 				value[0] = instruction->src[0].value[0];
   2057 				value[1] = instruction->src[0].value[1];
   2058 				value[2] = instruction->src[0].value[2];
   2059 				value[3] = instruction->src[0].value[3];
   2060 
   2061 				setVertexShaderConstantF(index, value);
   2062 			}
   2063 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2064 			{
   2065 				int index = instruction->dst.index;
   2066 				int integer[4];
   2067 
   2068 				integer[0] = instruction->src[0].integer[0];
   2069 				integer[1] = instruction->src[0].integer[1];
   2070 				integer[2] = instruction->src[0].integer[2];
   2071 				integer[3] = instruction->src[0].integer[3];
   2072 
   2073 				setVertexShaderConstantI(index, integer);
   2074 			}
   2075 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2076 			{
   2077 				int index = instruction->dst.index;
   2078 				int boolean = instruction->src[0].boolean[0];
   2079 
   2080 				setVertexShaderConstantB(index, &boolean);
   2081 			}
   2082 		}
   2083 	}
   2084 
   2085 	void Renderer::loadConstants(const PixelShader *pixelShader)
   2086 	{
   2087 		if(!pixelShader) return;
   2088 
   2089 		size_t count = pixelShader->getLength();
   2090 
   2091 		for(size_t i = 0; i < count; i++)
   2092 		{
   2093 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
   2094 
   2095 			if(instruction->opcode == Shader::OPCODE_DEF)
   2096 			{
   2097 				int index = instruction->dst.index;
   2098 				float value[4];
   2099 
   2100 				value[0] = instruction->src[0].value[0];
   2101 				value[1] = instruction->src[0].value[1];
   2102 				value[2] = instruction->src[0].value[2];
   2103 				value[3] = instruction->src[0].value[3];
   2104 
   2105 				setPixelShaderConstantF(index, value);
   2106 			}
   2107 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2108 			{
   2109 				int index = instruction->dst.index;
   2110 				int integer[4];
   2111 
   2112 				integer[0] = instruction->src[0].integer[0];
   2113 				integer[1] = instruction->src[0].integer[1];
   2114 				integer[2] = instruction->src[0].integer[2];
   2115 				integer[3] = instruction->src[0].integer[3];
   2116 
   2117 				setPixelShaderConstantI(index, integer);
   2118 			}
   2119 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2120 			{
   2121 				int index = instruction->dst.index;
   2122 				int boolean = instruction->src[0].boolean[0];
   2123 
   2124 				setPixelShaderConstantB(index, &boolean);
   2125 			}
   2126 		}
   2127 	}
   2128 
   2129 	void Renderer::setIndexBuffer(Resource *indexBuffer)
   2130 	{
   2131 		context->indexBuffer = indexBuffer;
   2132 	}
   2133 
   2134 	void Renderer::setMultiSampleMask(unsigned int mask)
   2135 	{
   2136 		context->sampleMask = mask;
   2137 	}
   2138 
   2139 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
   2140 	{
   2141 		sw::transparencyAntialiasing = transparencyAntialiasing;
   2142 	}
   2143 
   2144 	bool Renderer::isReadWriteTexture(int sampler)
   2145 	{
   2146 		for(int index = 0; index < RENDERTARGETS; index++)
   2147 		{
   2148 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
   2149 			{
   2150 				return true;
   2151 			}
   2152 		}
   2153 
   2154 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
   2155 		{
   2156 			return true;
   2157 		}
   2158 
   2159 		return false;
   2160 	}
   2161 
   2162 	void Renderer::updateClipper()
   2163 	{
   2164 		if(updateClipPlanes)
   2165 		{
   2166 			if(VertexProcessor::isFixedFunction())   // User plane in world space
   2167 			{
   2168 				const Matrix &scissorWorld = getViewTransform();
   2169 
   2170 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
   2171 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
   2172 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
   2173 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
   2174 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
   2175 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
   2176 			}
   2177 			else   // User plane in clip space
   2178 			{
   2179 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
   2180 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
   2181 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
   2182 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
   2183 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
   2184 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
   2185 			}
   2186 
   2187 			updateClipPlanes = false;
   2188 		}
   2189 	}
   2190 
   2191 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
   2192 	{
   2193 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
   2194 
   2195 		context->texture[sampler] = resource;
   2196 	}
   2197 
   2198 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
   2199 	{
   2200 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
   2201 
   2202 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
   2203 	}
   2204 
   2205 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
   2206 	{
   2207 		if(type == SAMPLER_PIXEL)
   2208 		{
   2209 			PixelProcessor::setTextureFilter(sampler, textureFilter);
   2210 		}
   2211 		else
   2212 		{
   2213 			VertexProcessor::setTextureFilter(sampler, textureFilter);
   2214 		}
   2215 	}
   2216 
   2217 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
   2218 	{
   2219 		if(type == SAMPLER_PIXEL)
   2220 		{
   2221 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
   2222 		}
   2223 		else
   2224 		{
   2225 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
   2226 		}
   2227 	}
   2228 
   2229 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
   2230 	{
   2231 		if(type == SAMPLER_PIXEL)
   2232 		{
   2233 			PixelProcessor::setGatherEnable(sampler, enable);
   2234 		}
   2235 		else
   2236 		{
   2237 			VertexProcessor::setGatherEnable(sampler, enable);
   2238 		}
   2239 	}
   2240 
   2241 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
   2242 	{
   2243 		if(type == SAMPLER_PIXEL)
   2244 		{
   2245 			PixelProcessor::setAddressingModeU(sampler, addressMode);
   2246 		}
   2247 		else
   2248 		{
   2249 			VertexProcessor::setAddressingModeU(sampler, addressMode);
   2250 		}
   2251 	}
   2252 
   2253 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
   2254 	{
   2255 		if(type == SAMPLER_PIXEL)
   2256 		{
   2257 			PixelProcessor::setAddressingModeV(sampler, addressMode);
   2258 		}
   2259 		else
   2260 		{
   2261 			VertexProcessor::setAddressingModeV(sampler, addressMode);
   2262 		}
   2263 	}
   2264 
   2265 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
   2266 	{
   2267 		if(type == SAMPLER_PIXEL)
   2268 		{
   2269 			PixelProcessor::setAddressingModeW(sampler, addressMode);
   2270 		}
   2271 		else
   2272 		{
   2273 			VertexProcessor::setAddressingModeW(sampler, addressMode);
   2274 		}
   2275 	}
   2276 
   2277 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
   2278 	{
   2279 		if(type == SAMPLER_PIXEL)
   2280 		{
   2281 			PixelProcessor::setReadSRGB(sampler, sRGB);
   2282 		}
   2283 		else
   2284 		{
   2285 			VertexProcessor::setReadSRGB(sampler, sRGB);
   2286 		}
   2287 	}
   2288 
   2289 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
   2290 	{
   2291 		if(type == SAMPLER_PIXEL)
   2292 		{
   2293 			PixelProcessor::setMipmapLOD(sampler, bias);
   2294 		}
   2295 		else
   2296 		{
   2297 			VertexProcessor::setMipmapLOD(sampler, bias);
   2298 		}
   2299 	}
   2300 
   2301 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
   2302 	{
   2303 		if(type == SAMPLER_PIXEL)
   2304 		{
   2305 			PixelProcessor::setBorderColor(sampler, borderColor);
   2306 		}
   2307 		else
   2308 		{
   2309 			VertexProcessor::setBorderColor(sampler, borderColor);
   2310 		}
   2311 	}
   2312 
   2313 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
   2314 	{
   2315 		if(type == SAMPLER_PIXEL)
   2316 		{
   2317 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2318 		}
   2319 		else
   2320 		{
   2321 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2322 		}
   2323 	}
   2324 
   2325 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
   2326 	{
   2327 		if(type == SAMPLER_PIXEL)
   2328 		{
   2329 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2330 		}
   2331 		else
   2332 		{
   2333 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2334 		}
   2335 	}
   2336 
   2337 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
   2338 	{
   2339 		if(type == SAMPLER_PIXEL)
   2340 		{
   2341 			PixelProcessor::setSwizzleR(sampler, swizzleR);
   2342 		}
   2343 		else
   2344 		{
   2345 			VertexProcessor::setSwizzleR(sampler, swizzleR);
   2346 		}
   2347 	}
   2348 
   2349 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
   2350 	{
   2351 		if(type == SAMPLER_PIXEL)
   2352 		{
   2353 			PixelProcessor::setSwizzleG(sampler, swizzleG);
   2354 		}
   2355 		else
   2356 		{
   2357 			VertexProcessor::setSwizzleG(sampler, swizzleG);
   2358 		}
   2359 	}
   2360 
   2361 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
   2362 	{
   2363 		if(type == SAMPLER_PIXEL)
   2364 		{
   2365 			PixelProcessor::setSwizzleB(sampler, swizzleB);
   2366 		}
   2367 		else
   2368 		{
   2369 			VertexProcessor::setSwizzleB(sampler, swizzleB);
   2370 		}
   2371 	}
   2372 
   2373 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
   2374 	{
   2375 		if(type == SAMPLER_PIXEL)
   2376 		{
   2377 			PixelProcessor::setSwizzleA(sampler, swizzleA);
   2378 		}
   2379 		else
   2380 		{
   2381 			VertexProcessor::setSwizzleA(sampler, swizzleA);
   2382 		}
   2383 	}
   2384 
   2385 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
   2386 	{
   2387 		if(type == SAMPLER_PIXEL)
   2388 		{
   2389 			PixelProcessor::setBaseLevel(sampler, baseLevel);
   2390 		}
   2391 		else
   2392 		{
   2393 			VertexProcessor::setBaseLevel(sampler, baseLevel);
   2394 		}
   2395 	}
   2396 
   2397 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
   2398 	{
   2399 		if(type == SAMPLER_PIXEL)
   2400 		{
   2401 			PixelProcessor::setMaxLevel(sampler, maxLevel);
   2402 		}
   2403 		else
   2404 		{
   2405 			VertexProcessor::setMaxLevel(sampler, maxLevel);
   2406 		}
   2407 	}
   2408 
   2409 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
   2410 	{
   2411 		if(type == SAMPLER_PIXEL)
   2412 		{
   2413 			PixelProcessor::setMinLod(sampler, minLod);
   2414 		}
   2415 		else
   2416 		{
   2417 			VertexProcessor::setMinLod(sampler, minLod);
   2418 		}
   2419 	}
   2420 
   2421 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
   2422 	{
   2423 		if(type == SAMPLER_PIXEL)
   2424 		{
   2425 			PixelProcessor::setMaxLod(sampler, maxLod);
   2426 		}
   2427 		else
   2428 		{
   2429 			VertexProcessor::setMaxLod(sampler, maxLod);
   2430 		}
   2431 	}
   2432 
   2433 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
   2434 	{
   2435 		context->setPointSpriteEnable(pointSpriteEnable);
   2436 	}
   2437 
   2438 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
   2439 	{
   2440 		context->setPointScaleEnable(pointScaleEnable);
   2441 	}
   2442 
   2443 	void Renderer::setLineWidth(float width)
   2444 	{
   2445 		context->lineWidth = width;
   2446 	}
   2447 
   2448 	void Renderer::setDepthBias(float bias)
   2449 	{
   2450 		depthBias = bias;
   2451 	}
   2452 
   2453 	void Renderer::setSlopeDepthBias(float slopeBias)
   2454 	{
   2455 		slopeDepthBias = slopeBias;
   2456 	}
   2457 
   2458 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
   2459 	{
   2460 		context->rasterizerDiscard = rasterizerDiscard;
   2461 	}
   2462 
   2463 	void Renderer::setPixelShader(const PixelShader *shader)
   2464 	{
   2465 		context->pixelShader = shader;
   2466 
   2467 		loadConstants(shader);
   2468 	}
   2469 
   2470 	void Renderer::setVertexShader(const VertexShader *shader)
   2471 	{
   2472 		context->vertexShader = shader;
   2473 
   2474 		loadConstants(shader);
   2475 	}
   2476 
   2477 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2478 	{
   2479 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2480 		{
   2481 			if(drawCall[i]->psDirtyConstF < index + count)
   2482 			{
   2483 				drawCall[i]->psDirtyConstF = index + count;
   2484 			}
   2485 		}
   2486 
   2487 		for(unsigned int i = 0; i < count; i++)
   2488 		{
   2489 			PixelProcessor::setFloatConstant(index + i, value);
   2490 			value += 4;
   2491 		}
   2492 	}
   2493 
   2494 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2495 	{
   2496 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2497 		{
   2498 			if(drawCall[i]->psDirtyConstI < index + count)
   2499 			{
   2500 				drawCall[i]->psDirtyConstI = index + count;
   2501 			}
   2502 		}
   2503 
   2504 		for(unsigned int i = 0; i < count; i++)
   2505 		{
   2506 			PixelProcessor::setIntegerConstant(index + i, value);
   2507 			value += 4;
   2508 		}
   2509 	}
   2510 
   2511 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2512 	{
   2513 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2514 		{
   2515 			if(drawCall[i]->psDirtyConstB < index + count)
   2516 			{
   2517 				drawCall[i]->psDirtyConstB = index + count;
   2518 			}
   2519 		}
   2520 
   2521 		for(unsigned int i = 0; i < count; i++)
   2522 		{
   2523 			PixelProcessor::setBooleanConstant(index + i, *boolean);
   2524 			boolean++;
   2525 		}
   2526 	}
   2527 
   2528 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2529 	{
   2530 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2531 		{
   2532 			if(drawCall[i]->vsDirtyConstF < index + count)
   2533 			{
   2534 				drawCall[i]->vsDirtyConstF = index + count;
   2535 			}
   2536 		}
   2537 
   2538 		for(unsigned int i = 0; i < count; i++)
   2539 		{
   2540 			VertexProcessor::setFloatConstant(index + i, value);
   2541 			value += 4;
   2542 		}
   2543 	}
   2544 
   2545 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2546 	{
   2547 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2548 		{
   2549 			if(drawCall[i]->vsDirtyConstI < index + count)
   2550 			{
   2551 				drawCall[i]->vsDirtyConstI = index + count;
   2552 			}
   2553 		}
   2554 
   2555 		for(unsigned int i = 0; i < count; i++)
   2556 		{
   2557 			VertexProcessor::setIntegerConstant(index + i, value);
   2558 			value += 4;
   2559 		}
   2560 	}
   2561 
   2562 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2563 	{
   2564 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2565 		{
   2566 			if(drawCall[i]->vsDirtyConstB < index + count)
   2567 			{
   2568 				drawCall[i]->vsDirtyConstB = index + count;
   2569 			}
   2570 		}
   2571 
   2572 		for(unsigned int i = 0; i < count; i++)
   2573 		{
   2574 			VertexProcessor::setBooleanConstant(index + i, *boolean);
   2575 			boolean++;
   2576 		}
   2577 	}
   2578 
   2579 	void Renderer::setModelMatrix(const Matrix &M, int i)
   2580 	{
   2581 		VertexProcessor::setModelMatrix(M, i);
   2582 	}
   2583 
   2584 	void Renderer::setViewMatrix(const Matrix &V)
   2585 	{
   2586 		VertexProcessor::setViewMatrix(V);
   2587 		updateClipPlanes = true;
   2588 	}
   2589 
   2590 	void Renderer::setBaseMatrix(const Matrix &B)
   2591 	{
   2592 		VertexProcessor::setBaseMatrix(B);
   2593 		updateClipPlanes = true;
   2594 	}
   2595 
   2596 	void Renderer::setProjectionMatrix(const Matrix &P)
   2597 	{
   2598 		VertexProcessor::setProjectionMatrix(P);
   2599 		updateClipPlanes = true;
   2600 	}
   2601 
   2602 	void Renderer::addQuery(Query *query)
   2603 	{
   2604 		queries.push_back(query);
   2605 	}
   2606 
   2607 	void Renderer::removeQuery(Query *query)
   2608 	{
   2609 		queries.remove(query);
   2610 	}
   2611 
   2612 	#if PERF_HUD
   2613 		int Renderer::getThreadCount()
   2614 		{
   2615 			return threadCount;
   2616 		}
   2617 
   2618 		int64_t Renderer::getVertexTime(int thread)
   2619 		{
   2620 			return vertexTime[thread];
   2621 		}
   2622 
   2623 		int64_t Renderer::getSetupTime(int thread)
   2624 		{
   2625 			return setupTime[thread];
   2626 		}
   2627 
   2628 		int64_t Renderer::getPixelTime(int thread)
   2629 		{
   2630 			return pixelTime[thread];
   2631 		}
   2632 
   2633 		void Renderer::resetTimers()
   2634 		{
   2635 			for(int thread = 0; thread < threadCount; thread++)
   2636 			{
   2637 				vertexTime[thread] = 0;
   2638 				setupTime[thread] = 0;
   2639 				pixelTime[thread] = 0;
   2640 			}
   2641 		}
   2642 	#endif
   2643 
   2644 	void Renderer::setViewport(const Viewport &viewport)
   2645 	{
   2646 		this->viewport = viewport;
   2647 	}
   2648 
   2649 	void Renderer::setScissor(const Rect &scissor)
   2650 	{
   2651 		this->scissor = scissor;
   2652 	}
   2653 
   2654 	void Renderer::setClipFlags(int flags)
   2655 	{
   2656 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
   2657 	}
   2658 
   2659 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
   2660 	{
   2661 		if(index < MAX_CLIP_PLANES)
   2662 		{
   2663 			userPlane[index] = plane;
   2664 		}
   2665 		else ASSERT(false);
   2666 
   2667 		updateClipPlanes = true;
   2668 	}
   2669 
   2670 	void Renderer::updateConfiguration(bool initialUpdate)
   2671 	{
   2672 		bool newConfiguration = swiftConfig->hasNewConfiguration();
   2673 
   2674 		if(newConfiguration || initialUpdate)
   2675 		{
   2676 			terminateThreads();
   2677 
   2678 			SwiftConfig::Configuration configuration = {};
   2679 			swiftConfig->getConfiguration(configuration);
   2680 
   2681 			precacheVertex = !newConfiguration && configuration.precache;
   2682 			precacheSetup = !newConfiguration && configuration.precache;
   2683 			precachePixel = !newConfiguration && configuration.precache;
   2684 
   2685 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
   2686 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
   2687 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
   2688 
   2689 			switch(configuration.textureSampleQuality)
   2690 			{
   2691 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
   2692 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
   2693 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2694 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2695 			}
   2696 
   2697 			switch(configuration.mipmapQuality)
   2698 			{
   2699 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
   2700 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2701 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2702 			}
   2703 
   2704 			setPerspectiveCorrection(configuration.perspectiveCorrection);
   2705 
   2706 			switch(configuration.transcendentalPrecision)
   2707 			{
   2708 			case 0:
   2709 				logPrecision = APPROXIMATE;
   2710 				expPrecision = APPROXIMATE;
   2711 				rcpPrecision = APPROXIMATE;
   2712 				rsqPrecision = APPROXIMATE;
   2713 				break;
   2714 			case 1:
   2715 				logPrecision = PARTIAL;
   2716 				expPrecision = PARTIAL;
   2717 				rcpPrecision = PARTIAL;
   2718 				rsqPrecision = PARTIAL;
   2719 				break;
   2720 			case 2:
   2721 				logPrecision = ACCURATE;
   2722 				expPrecision = ACCURATE;
   2723 				rcpPrecision = ACCURATE;
   2724 				rsqPrecision = ACCURATE;
   2725 				break;
   2726 			case 3:
   2727 				logPrecision = WHQL;
   2728 				expPrecision = WHQL;
   2729 				rcpPrecision = WHQL;
   2730 				rsqPrecision = WHQL;
   2731 				break;
   2732 			case 4:
   2733 				logPrecision = IEEE;
   2734 				expPrecision = IEEE;
   2735 				rcpPrecision = IEEE;
   2736 				rsqPrecision = IEEE;
   2737 				break;
   2738 			default:
   2739 				logPrecision = ACCURATE;
   2740 				expPrecision = ACCURATE;
   2741 				rcpPrecision = ACCURATE;
   2742 				rsqPrecision = ACCURATE;
   2743 				break;
   2744 			}
   2745 
   2746 			switch(configuration.transparencyAntialiasing)
   2747 			{
   2748 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2749 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
   2750 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2751 			}
   2752 
   2753 			switch(configuration.threadCount)
   2754 			{
   2755 			case -1: threadCount = CPUID::coreCount();        break;
   2756 			case 0:  threadCount = CPUID::processAffinity();  break;
   2757 			default: threadCount = configuration.threadCount; break;
   2758 			}
   2759 
   2760 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
   2761 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
   2762 			CPUID::setEnableSSE3(configuration.enableSSE3);
   2763 			CPUID::setEnableSSE2(configuration.enableSSE2);
   2764 			CPUID::setEnableSSE(configuration.enableSSE);
   2765 
   2766 			for(int pass = 0; pass < 10; pass++)
   2767 			{
   2768 				optimization[pass] = configuration.optimization[pass];
   2769 			}
   2770 
   2771 			forceWindowed = configuration.forceWindowed;
   2772 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
   2773 			postBlendSRGB = configuration.postBlendSRGB;
   2774 			exactColorRounding = configuration.exactColorRounding;
   2775 			forceClearRegisters = configuration.forceClearRegisters;
   2776 
   2777 		#ifndef NDEBUG
   2778 			minPrimitives = configuration.minPrimitives;
   2779 			maxPrimitives = configuration.maxPrimitives;
   2780 		#endif
   2781 		}
   2782 
   2783 		if(!initialUpdate && !worker[0])
   2784 		{
   2785 			initializeThreads();
   2786 		}
   2787 	}
   2788 }
   2789