Home | History | Annotate | Download | only in Renderer
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Renderer.hpp"
     16 
     17 #include "Clipper.hpp"
     18 #include "Surface.hpp"
     19 #include "Primitive.hpp"
     20 #include "Polygon.hpp"
     21 #include "Main/FrameBuffer.hpp"
     22 #include "Main/SwiftConfig.hpp"
     23 #include "Reactor/Reactor.hpp"
     24 #include "Shader/Constants.hpp"
     25 #include "Common/MutexLock.hpp"
     26 #include "Common/CPUID.hpp"
     27 #include "Common/Memory.hpp"
     28 #include "Common/Resource.hpp"
     29 #include "Common/Half.hpp"
     30 #include "Common/Math.hpp"
     31 #include "Common/Timer.hpp"
     32 #include "Common/Debug.hpp"
     33 
     34 #undef max
     35 
     36 bool disableServer = true;
     37 
     38 #ifndef NDEBUG
     39 unsigned int minPrimitives = 1;
     40 unsigned int maxPrimitives = 1 << 21;
     41 #endif
     42 
     43 namespace sw
     44 {
     45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
     46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
     47 	extern bool booleanFaceRegister;
     48 	extern bool fullPixelPositionRegister;
     49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
     50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
     51 	extern bool colorsDefaultToZero;
     52 
     53 	extern bool forceWindowed;
     54 	extern bool complementaryDepthBuffer;
     55 	extern bool postBlendSRGB;
     56 	extern bool exactColorRounding;
     57 	extern TransparencyAntialiasing transparencyAntialiasing;
     58 	extern bool forceClearRegisters;
     59 
     60 	extern bool precacheVertex;
     61 	extern bool precacheSetup;
     62 	extern bool precachePixel;
     63 
     64 	static const int batchSize = 128;
     65 	AtomicInt threadCount(1);
     66 	AtomicInt Renderer::unitCount(1);
     67 	AtomicInt Renderer::clusterCount(1);
     68 
     69 	TranscendentalPrecision logPrecision = ACCURATE;
     70 	TranscendentalPrecision expPrecision = ACCURATE;
     71 	TranscendentalPrecision rcpPrecision = ACCURATE;
     72 	TranscendentalPrecision rsqPrecision = ACCURATE;
     73 	bool perspectiveCorrection = true;
     74 
     75 	struct Parameters
     76 	{
     77 		Renderer *renderer;
     78 		int threadIndex;
     79 	};
     80 
     81 	DrawCall::DrawCall()
     82 	{
     83 		queries = 0;
     84 
     85 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
     86 		vsDirtyConstI = 16;
     87 		vsDirtyConstB = 16;
     88 
     89 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
     90 		psDirtyConstI = 16;
     91 		psDirtyConstB = 16;
     92 
     93 		references = -1;
     94 
     95 		data = (DrawData*)allocate(sizeof(DrawData));
     96 		data->constants = &constants;
     97 	}
     98 
     99 	DrawCall::~DrawCall()
    100 	{
    101 		delete queries;
    102 
    103 		deallocate(data);
    104 	}
    105 
    106 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
    107 	{
    108 		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
    109 		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
    110 		sw::booleanFaceRegister = conventions.booleanFaceRegister;
    111 		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
    112 		sw::leadingVertexFirst = conventions.leadingVertexFirst;
    113 		sw::secondaryColor = conventions.secondaryColor;
    114 		sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
    115 		sw::exactColorRounding = exactColorRounding;
    116 
    117 		setRenderTarget(0, 0);
    118 		clipper = new Clipper(symmetricNormalizedDepth);
    119 		blitter = new Blitter;
    120 
    121 		updateViewMatrix = true;
    122 		updateBaseMatrix = true;
    123 		updateProjectionMatrix = true;
    124 		updateClipPlanes = true;
    125 
    126 		#if PERF_HUD
    127 			resetTimers();
    128 		#endif
    129 
    130 		for(int i = 0; i < 16; i++)
    131 		{
    132 			vertexTask[i] = 0;
    133 
    134 			worker[i] = 0;
    135 			resume[i] = 0;
    136 			suspend[i] = 0;
    137 		}
    138 
    139 		threadsAwake = 0;
    140 		resumeApp = new Event();
    141 
    142 		currentDraw = 0;
    143 		nextDraw = 0;
    144 
    145 		qHead = 0;
    146 		qSize = 0;
    147 
    148 		for(int i = 0; i < 16; i++)
    149 		{
    150 			triangleBatch[i] = 0;
    151 			primitiveBatch[i] = 0;
    152 		}
    153 
    154 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    155 		{
    156 			drawCall[draw] = new DrawCall();
    157 			drawList[draw] = drawCall[draw];
    158 		}
    159 
    160 		for(int unit = 0; unit < 16; unit++)
    161 		{
    162 			primitiveProgress[unit].init();
    163 		}
    164 
    165 		for(int cluster = 0; cluster < 16; cluster++)
    166 		{
    167 			pixelProgress[cluster].init();
    168 		}
    169 
    170 		clipFlags = 0;
    171 
    172 		swiftConfig = new SwiftConfig(disableServer);
    173 		updateConfiguration(true);
    174 
    175 		sync = new Resource(0);
    176 	}
    177 
    178 	Renderer::~Renderer()
    179 	{
    180 		sync->destruct();
    181 
    182 		delete clipper;
    183 		clipper = nullptr;
    184 
    185 		delete blitter;
    186 		blitter = nullptr;
    187 
    188 		terminateThreads();
    189 		delete resumeApp;
    190 
    191 		for(int draw = 0; draw < DRAW_COUNT; draw++)
    192 		{
    193 			delete drawCall[draw];
    194 		}
    195 
    196 		delete swiftConfig;
    197 	}
    198 
    199 	// This object has to be mem aligned
    200 	void* Renderer::operator new(size_t size)
    201 	{
    202 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
    203 		return sw::allocate(sizeof(Renderer), 16);
    204 	}
    205 
    206 	void Renderer::operator delete(void * mem)
    207 	{
    208 		sw::deallocate(mem);
    209 	}
    210 
    211 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
    212 	{
    213 		#ifndef NDEBUG
    214 			if(count < minPrimitives || count > maxPrimitives)
    215 			{
    216 				return;
    217 			}
    218 		#endif
    219 
    220 		context->drawType = drawType;
    221 
    222 		updateConfiguration();
    223 		updateClipper();
    224 
    225 		int ss = context->getSuperSampleCount();
    226 		int ms = context->getMultiSampleCount();
    227 
    228 		for(int q = 0; q < ss; q++)
    229 		{
    230 			unsigned int oldMultiSampleMask = context->multiSampleMask;
    231 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
    232 
    233 			if(!context->multiSampleMask)
    234 			{
    235 				continue;
    236 			}
    237 
    238 			sync->lock(sw::PRIVATE);
    239 
    240 			if(update || oldMultiSampleMask != context->multiSampleMask)
    241 			{
    242 				vertexState = VertexProcessor::update(drawType);
    243 				setupState = SetupProcessor::update();
    244 				pixelState = PixelProcessor::update();
    245 
    246 				vertexRoutine = VertexProcessor::routine(vertexState);
    247 				setupRoutine = SetupProcessor::routine(setupState);
    248 				pixelRoutine = PixelProcessor::routine(pixelState);
    249 			}
    250 
    251 			int batch = batchSize / ms;
    252 
    253 			int (Renderer::*setupPrimitives)(int batch, int count);
    254 
    255 			if(context->isDrawTriangle())
    256 			{
    257 				switch(context->fillMode)
    258 				{
    259 				case FILL_SOLID:
    260 					setupPrimitives = &Renderer::setupSolidTriangles;
    261 					break;
    262 				case FILL_WIREFRAME:
    263 					setupPrimitives = &Renderer::setupWireframeTriangle;
    264 					batch = 1;
    265 					break;
    266 				case FILL_VERTEX:
    267 					setupPrimitives = &Renderer::setupVertexTriangle;
    268 					batch = 1;
    269 					break;
    270 				default:
    271 					ASSERT(false);
    272 					return;
    273 				}
    274 			}
    275 			else if(context->isDrawLine())
    276 			{
    277 				setupPrimitives = &Renderer::setupLines;
    278 			}
    279 			else   // Point draw
    280 			{
    281 				setupPrimitives = &Renderer::setupPoints;
    282 			}
    283 
    284 			DrawCall *draw = nullptr;
    285 
    286 			do
    287 			{
    288 				for(int i = 0; i < DRAW_COUNT; i++)
    289 				{
    290 					if(drawCall[i]->references == -1)
    291 					{
    292 						draw = drawCall[i];
    293 						drawList[nextDraw & DRAW_COUNT_BITS] = draw;
    294 
    295 						break;
    296 					}
    297 				}
    298 
    299 				if(!draw)
    300 				{
    301 					resumeApp->wait();
    302 				}
    303 			}
    304 			while(!draw);
    305 
    306 			DrawData *data = draw->data;
    307 
    308 			if(queries.size() != 0)
    309 			{
    310 				draw->queries = new std::list<Query*>();
    311 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
    312 				for(auto &query : queries)
    313 				{
    314 					if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
    315 					{
    316 						++query->reference; // Atomic
    317 						draw->queries->push_back(query);
    318 					}
    319 				}
    320 			}
    321 
    322 			draw->drawType = drawType;
    323 			draw->batchSize = batch;
    324 
    325 			vertexRoutine->bind();
    326 			setupRoutine->bind();
    327 			pixelRoutine->bind();
    328 
    329 			draw->vertexRoutine = vertexRoutine;
    330 			draw->setupRoutine = setupRoutine;
    331 			draw->pixelRoutine = pixelRoutine;
    332 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
    333 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
    334 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
    335 			draw->setupPrimitives = setupPrimitives;
    336 			draw->setupState = setupState;
    337 
    338 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
    339 			{
    340 				draw->vertexStream[i] = context->input[i].resource;
    341 				data->input[i] = context->input[i].buffer;
    342 				data->stride[i] = context->input[i].stride;
    343 
    344 				if(draw->vertexStream[i])
    345 				{
    346 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
    347 				}
    348 			}
    349 
    350 			if(context->indexBuffer)
    351 			{
    352 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
    353 			}
    354 
    355 			draw->indexBuffer = context->indexBuffer;
    356 
    357 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
    358 			{
    359 				draw->texture[sampler] = 0;
    360 			}
    361 
    362 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
    363 			{
    364 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
    365 				{
    366 					draw->texture[sampler] = context->texture[sampler];
    367 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
    368 
    369 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
    370 				}
    371 			}
    372 
    373 			if(context->pixelShader)
    374 			{
    375 				if(draw->psDirtyConstF)
    376 				{
    377 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
    378 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
    379 					draw->psDirtyConstF = 0;
    380 				}
    381 
    382 				if(draw->psDirtyConstI)
    383 				{
    384 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
    385 					draw->psDirtyConstI = 0;
    386 				}
    387 
    388 				if(draw->psDirtyConstB)
    389 				{
    390 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
    391 					draw->psDirtyConstB = 0;
    392 				}
    393 
    394 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
    395 			}
    396 			else
    397 			{
    398 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    399 				{
    400 					draw->pUniformBuffers[i] = nullptr;
    401 				}
    402 			}
    403 
    404 			if(context->pixelShaderModel() <= 0x0104)
    405 			{
    406 				for(int stage = 0; stage < 8; stage++)
    407 				{
    408 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
    409 					{
    410 						data->textureStage[stage] = context->textureStage[stage].uniforms;
    411 					}
    412 					else break;
    413 				}
    414 			}
    415 
    416 			if(context->vertexShader)
    417 			{
    418 				if(context->vertexShader->getShaderModel() >= 0x0300)
    419 				{
    420 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
    421 					{
    422 						if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
    423 						{
    424 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
    425 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
    426 
    427 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
    428 						}
    429 					}
    430 				}
    431 
    432 				if(draw->vsDirtyConstF)
    433 				{
    434 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
    435 					draw->vsDirtyConstF = 0;
    436 				}
    437 
    438 				if(draw->vsDirtyConstI)
    439 				{
    440 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
    441 					draw->vsDirtyConstI = 0;
    442 				}
    443 
    444 				if(draw->vsDirtyConstB)
    445 				{
    446 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
    447 					draw->vsDirtyConstB = 0;
    448 				}
    449 
    450 				if(context->vertexShader->isInstanceIdDeclared())
    451 				{
    452 					data->instanceID = context->instanceID;
    453 				}
    454 
    455 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
    456 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
    457 			}
    458 			else
    459 			{
    460 				data->ff = ff;
    461 
    462 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
    463 				draw->vsDirtyConstI = 16;
    464 				draw->vsDirtyConstB = 16;
    465 
    466 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
    467 				{
    468 					draw->vUniformBuffers[i] = nullptr;
    469 				}
    470 
    471 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
    472 				{
    473 					draw->transformFeedbackBuffers[i] = nullptr;
    474 				}
    475 			}
    476 
    477 			if(pixelState.stencilActive)
    478 			{
    479 				data->stencil[0] = stencil;
    480 				data->stencil[1] = stencilCCW;
    481 			}
    482 
    483 			if(pixelState.fogActive)
    484 			{
    485 				data->fog = fog;
    486 			}
    487 
    488 			if(setupState.isDrawPoint)
    489 			{
    490 				data->point = point;
    491 			}
    492 
    493 			data->lineWidth = context->lineWidth;
    494 
    495 			data->factor = factor;
    496 
    497 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
    498 			{
    499 				float ref = context->alphaReference * (1.0f / 255.0f);
    500 				float margin = sw::min(ref, 1.0f - ref);
    501 
    502 				if(ms == 4)
    503 				{
    504 					data->a2c0 = replicate(ref - margin * 0.6f);
    505 					data->a2c1 = replicate(ref - margin * 0.2f);
    506 					data->a2c2 = replicate(ref + margin * 0.2f);
    507 					data->a2c3 = replicate(ref + margin * 0.6f);
    508 				}
    509 				else if(ms == 2)
    510 				{
    511 					data->a2c0 = replicate(ref - margin * 0.3f);
    512 					data->a2c1 = replicate(ref + margin * 0.3f);
    513 				}
    514 				else ASSERT(false);
    515 			}
    516 
    517 			if(pixelState.occlusionEnabled)
    518 			{
    519 				for(int cluster = 0; cluster < clusterCount; cluster++)
    520 				{
    521 					data->occlusion[cluster] = 0;
    522 				}
    523 			}
    524 
    525 			#if PERF_PROFILE
    526 				for(int cluster = 0; cluster < clusterCount; cluster++)
    527 				{
    528 					for(int i = 0; i < PERF_TIMERS; i++)
    529 					{
    530 						data->cycles[i][cluster] = 0;
    531 					}
    532 				}
    533 			#endif
    534 
    535 			// Viewport
    536 			{
    537 				float W = 0.5f * viewport.width;
    538 				float H = 0.5f * viewport.height;
    539 				float X0 = viewport.x0 + W;
    540 				float Y0 = viewport.y0 + H;
    541 				float N = viewport.minZ;
    542 				float F = viewport.maxZ;
    543 				float Z = F - N;
    544 
    545 				if(context->isDrawTriangle(false))
    546 				{
    547 					N += context->depthBias;
    548 				}
    549 
    550 				if(complementaryDepthBuffer)
    551 				{
    552 					Z = -Z;
    553 					N = 1 - N;
    554 				}
    555 
    556 				static const float X[5][16] =   // Fragment offsets
    557 				{
    558 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    559 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    560 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    561 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    562 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
    563 				};
    564 
    565 				static const float Y[5][16] =   // Fragment offsets
    566 				{
    567 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
    568 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
    569 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
    570 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
    571 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
    572 				};
    573 
    574 				int s = sw::log2(ss);
    575 
    576 				data->Wx16 = replicate(W * 16);
    577 				data->Hx16 = replicate(H * 16);
    578 				data->X0x16 = replicate(X0 * 16 - 8);
    579 				data->Y0x16 = replicate(Y0 * 16 - 8);
    580 				data->XXXX = replicate(X[s][q] / W);
    581 				data->YYYY = replicate(Y[s][q] / H);
    582 				data->halfPixelX = replicate(0.5f / W);
    583 				data->halfPixelY = replicate(0.5f / H);
    584 				data->viewportHeight = abs(viewport.height);
    585 				data->slopeDepthBias = context->slopeDepthBias;
    586 				data->depthRange = Z;
    587 				data->depthNear = N;
    588 				draw->clipFlags = clipFlags;
    589 
    590 				if(clipFlags)
    591 				{
    592 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
    593 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
    594 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
    595 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
    596 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
    597 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
    598 				}
    599 			}
    600 
    601 			// Target
    602 			{
    603 				for(int index = 0; index < RENDERTARGETS; index++)
    604 				{
    605 					draw->renderTarget[index] = context->renderTarget[index];
    606 
    607 					if(draw->renderTarget[index])
    608 					{
    609 						unsigned int layer = context->renderTargetLayer[index];
    610 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    611 						data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true);
    612 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
    613 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
    614 					}
    615 				}
    616 
    617 				draw->depthBuffer = context->depthBuffer;
    618 				draw->stencilBuffer = context->stencilBuffer;
    619 
    620 				if(draw->depthBuffer)
    621 				{
    622 					unsigned int layer = context->depthBufferLayer;
    623 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
    624 					data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true);
    625 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
    626 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
    627 				}
    628 
    629 				if(draw->stencilBuffer)
    630 				{
    631 					unsigned int layer = context->stencilBufferLayer;
    632 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
    633 					data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true);
    634 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
    635 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
    636 				}
    637 			}
    638 
    639 			// Scissor
    640 			{
    641 				data->scissorX0 = scissor.x0;
    642 				data->scissorX1 = scissor.x1;
    643 				data->scissorY0 = scissor.y0;
    644 				data->scissorY1 = scissor.y1;
    645 			}
    646 
    647 			draw->primitive = 0;
    648 			draw->count = count;
    649 
    650 			draw->references = (count + batch - 1) / batch;
    651 
    652 			schedulerMutex.lock();
    653 			++nextDraw; // Atomic
    654 			schedulerMutex.unlock();
    655 
    656 			#ifndef NDEBUG
    657 			if(threadCount == 1)   // Use main thread for draw execution
    658 			{
    659 				threadsAwake = 1;
    660 				task[0].type = Task::RESUME;
    661 
    662 				taskLoop(0);
    663 			}
    664 			else
    665 			#endif
    666 			{
    667 				if(!threadsAwake)
    668 				{
    669 					suspend[0]->wait();
    670 
    671 					threadsAwake = 1;
    672 					task[0].type = Task::RESUME;
    673 
    674 					resume[0]->signal();
    675 				}
    676 			}
    677 		}
    678 	}
    679 
    680 	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
    681 	{
    682 		blitter->clear(value, format, dest, clearRect, rgbaMask);
    683 	}
    684 
    685 	void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
    686 	{
    687 		blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
    688 	}
    689 
    690 	void Renderer::blit3D(Surface *source, Surface *dest)
    691 	{
    692 		blitter->blit3D(source, dest);
    693 	}
    694 
    695 	void Renderer::threadFunction(void *parameters)
    696 	{
    697 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
    698 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
    699 
    700 		if(logPrecision < IEEE)
    701 		{
    702 			CPUID::setFlushToZero(true);
    703 			CPUID::setDenormalsAreZero(true);
    704 		}
    705 
    706 		renderer->threadLoop(threadIndex);
    707 	}
    708 
    709 	void Renderer::threadLoop(int threadIndex)
    710 	{
    711 		while(!exitThreads)
    712 		{
    713 			taskLoop(threadIndex);
    714 
    715 			suspend[threadIndex]->signal();
    716 			resume[threadIndex]->wait();
    717 		}
    718 	}
    719 
    720 	void Renderer::taskLoop(int threadIndex)
    721 	{
    722 		while(task[threadIndex].type != Task::SUSPEND)
    723 		{
    724 			scheduleTask(threadIndex);
    725 			executeTask(threadIndex);
    726 		}
    727 	}
    728 
    729 	void Renderer::findAvailableTasks()
    730 	{
    731 		// Find pixel tasks
    732 		for(int cluster = 0; cluster < clusterCount; cluster++)
    733 		{
    734 			if(!pixelProgress[cluster].executing)
    735 			{
    736 				for(int unit = 0; unit < unitCount; unit++)
    737 				{
    738 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
    739 					{
    740 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
    741 						{
    742 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
    743 							{
    744 								Task &task = taskQueue[qHead];
    745 								task.type = Task::PIXELS;
    746 								task.primitiveUnit = unit;
    747 								task.pixelCluster = cluster;
    748 
    749 								pixelProgress[cluster].executing = true;
    750 
    751 								// Commit to the task queue
    752 								qHead = (qHead + 1) & TASK_COUNT_BITS;
    753 								qSize++;
    754 
    755 								break;
    756 							}
    757 						}
    758 					}
    759 				}
    760 			}
    761 		}
    762 
    763 		// Find primitive tasks
    764 		if(currentDraw == nextDraw)
    765 		{
    766 			return;   // No more primitives to process
    767 		}
    768 
    769 		for(int unit = 0; unit < unitCount; unit++)
    770 		{
    771 			DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
    772 
    773 			int primitive = draw->primitive;
    774 			int count = draw->count;
    775 
    776 			if(primitive >= count)
    777 			{
    778 				++currentDraw; // Atomic
    779 
    780 				if(currentDraw == nextDraw)
    781 				{
    782 					return;   // No more primitives to process
    783 				}
    784 
    785 				draw = drawList[currentDraw & DRAW_COUNT_BITS];
    786 			}
    787 
    788 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
    789 			{
    790 				primitive = draw->primitive;
    791 				count = draw->count;
    792 				int batch = draw->batchSize;
    793 
    794 				primitiveProgress[unit].drawCall = currentDraw;
    795 				primitiveProgress[unit].firstPrimitive = primitive;
    796 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
    797 
    798 				draw->primitive += batch;
    799 
    800 				Task &task = taskQueue[qHead];
    801 				task.type = Task::PRIMITIVES;
    802 				task.primitiveUnit = unit;
    803 
    804 				primitiveProgress[unit].references = -1;
    805 
    806 				// Commit to the task queue
    807 				qHead = (qHead + 1) & TASK_COUNT_BITS;
    808 				qSize++;
    809 			}
    810 		}
    811 	}
    812 
    813 	void Renderer::scheduleTask(int threadIndex)
    814 	{
    815 		schedulerMutex.lock();
    816 
    817 		int curThreadsAwake = threadsAwake;
    818 
    819 		if((int)qSize < threadCount - curThreadsAwake + 1)
    820 		{
    821 			findAvailableTasks();
    822 		}
    823 
    824 		if(qSize != 0)
    825 		{
    826 			task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
    827 			qSize--;
    828 
    829 			if(curThreadsAwake != threadCount)
    830 			{
    831 				int wakeup = qSize - curThreadsAwake + 1;
    832 
    833 				for(int i = 0; i < threadCount && wakeup > 0; i++)
    834 				{
    835 					if(task[i].type == Task::SUSPEND)
    836 					{
    837 						suspend[i]->wait();
    838 						task[i].type = Task::RESUME;
    839 						resume[i]->signal();
    840 
    841 						++threadsAwake; // Atomic
    842 						wakeup--;
    843 					}
    844 				}
    845 			}
    846 		}
    847 		else
    848 		{
    849 			task[threadIndex].type = Task::SUSPEND;
    850 
    851 			--threadsAwake; // Atomic
    852 		}
    853 
    854 		schedulerMutex.unlock();
    855 	}
    856 
    857 	void Renderer::executeTask(int threadIndex)
    858 	{
    859 		#if PERF_HUD
    860 			int64_t startTick = Timer::ticks();
    861 		#endif
    862 
    863 		switch(task[threadIndex].type)
    864 		{
    865 		case Task::PRIMITIVES:
    866 			{
    867 				int unit = task[threadIndex].primitiveUnit;
    868 
    869 				int input = primitiveProgress[unit].firstPrimitive;
    870 				int count = primitiveProgress[unit].primitiveCount;
    871 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    872 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
    873 
    874 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
    875 
    876 				#if PERF_HUD
    877 					int64_t time = Timer::ticks();
    878 					vertexTime[threadIndex] += time - startTick;
    879 					startTick = time;
    880 				#endif
    881 
    882 				int visible = 0;
    883 
    884 				if(!draw->setupState.rasterizerDiscard)
    885 				{
    886 					visible = (this->*setupPrimitives)(unit, count);
    887 				}
    888 
    889 				primitiveProgress[unit].visible = visible;
    890 				primitiveProgress[unit].references = clusterCount;
    891 
    892 				#if PERF_HUD
    893 					setupTime[threadIndex] += Timer::ticks() - startTick;
    894 				#endif
    895 			}
    896 			break;
    897 		case Task::PIXELS:
    898 			{
    899 				int unit = task[threadIndex].primitiveUnit;
    900 				int visible = primitiveProgress[unit].visible;
    901 
    902 				if(visible > 0)
    903 				{
    904 					int cluster = task[threadIndex].pixelCluster;
    905 					Primitive *primitive = primitiveBatch[unit];
    906 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
    907 					DrawData *data = draw->data;
    908 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
    909 
    910 					pixelRoutine(primitive, visible, cluster, data);
    911 				}
    912 
    913 				finishRendering(task[threadIndex]);
    914 
    915 				#if PERF_HUD
    916 					pixelTime[threadIndex] += Timer::ticks() - startTick;
    917 				#endif
    918 			}
    919 			break;
    920 		case Task::RESUME:
    921 			break;
    922 		case Task::SUSPEND:
    923 			break;
    924 		default:
    925 			ASSERT(false);
    926 		}
    927 	}
    928 
    929 	void Renderer::synchronize()
    930 	{
    931 		sync->lock(sw::PUBLIC);
    932 		sync->unlock();
    933 	}
    934 
    935 	void Renderer::finishRendering(Task &pixelTask)
    936 	{
    937 		int unit = pixelTask.primitiveUnit;
    938 		int cluster = pixelTask.pixelCluster;
    939 
    940 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
    941 		DrawData &data = *draw.data;
    942 		int primitive = primitiveProgress[unit].firstPrimitive;
    943 		int count = primitiveProgress[unit].primitiveCount;
    944 		int processedPrimitives = primitive + count;
    945 
    946 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
    947 
    948 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
    949 		{
    950 			++pixelProgress[cluster].drawCall; // Atomic
    951 			pixelProgress[cluster].processedPrimitives = 0;
    952 		}
    953 
    954 		int ref = primitiveProgress[unit].references--; // Atomic
    955 
    956 		if(ref == 0)
    957 		{
    958 			ref = draw.references--; // Atomic
    959 
    960 			if(ref == 0)
    961 			{
    962 				#if PERF_PROFILE
    963 					for(int cluster = 0; cluster < clusterCount; cluster++)
    964 					{
    965 						for(int i = 0; i < PERF_TIMERS; i++)
    966 						{
    967 							profiler.cycles[i] += data.cycles[i][cluster];
    968 						}
    969 					}
    970 				#endif
    971 
    972 				if(draw.queries)
    973 				{
    974 					for(auto &query : *(draw.queries))
    975 					{
    976 						switch(query->type)
    977 						{
    978 						case Query::FRAGMENTS_PASSED:
    979 							for(int cluster = 0; cluster < clusterCount; cluster++)
    980 							{
    981 								query->data += data.occlusion[cluster];
    982 							}
    983 							break;
    984 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
    985 							query->data += processedPrimitives;
    986 							break;
    987 						default:
    988 							break;
    989 						}
    990 
    991 						--query->reference; // Atomic
    992 					}
    993 
    994 					delete draw.queries;
    995 					draw.queries = 0;
    996 				}
    997 
    998 				for(int i = 0; i < RENDERTARGETS; i++)
    999 				{
   1000 					if(draw.renderTarget[i])
   1001 					{
   1002 						draw.renderTarget[i]->unlockInternal();
   1003 					}
   1004 				}
   1005 
   1006 				if(draw.depthBuffer)
   1007 				{
   1008 					draw.depthBuffer->unlockInternal();
   1009 				}
   1010 
   1011 				if(draw.stencilBuffer)
   1012 				{
   1013 					draw.stencilBuffer->unlockStencil();
   1014 				}
   1015 
   1016 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
   1017 				{
   1018 					if(draw.texture[i])
   1019 					{
   1020 						draw.texture[i]->unlock();
   1021 					}
   1022 				}
   1023 
   1024 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
   1025 				{
   1026 					if(draw.vertexStream[i])
   1027 					{
   1028 						draw.vertexStream[i]->unlock();
   1029 					}
   1030 				}
   1031 
   1032 				if(draw.indexBuffer)
   1033 				{
   1034 					draw.indexBuffer->unlock();
   1035 				}
   1036 
   1037 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
   1038 				{
   1039 					if(draw.pUniformBuffers[i])
   1040 					{
   1041 						draw.pUniformBuffers[i]->unlock();
   1042 					}
   1043 					if(draw.vUniformBuffers[i])
   1044 					{
   1045 						draw.vUniformBuffers[i]->unlock();
   1046 					}
   1047 				}
   1048 
   1049 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
   1050 				{
   1051 					if(draw.transformFeedbackBuffers[i])
   1052 					{
   1053 						draw.transformFeedbackBuffers[i]->unlock();
   1054 					}
   1055 				}
   1056 
   1057 				draw.vertexRoutine->unbind();
   1058 				draw.setupRoutine->unbind();
   1059 				draw.pixelRoutine->unbind();
   1060 
   1061 				sync->unlock();
   1062 
   1063 				draw.references = -1;
   1064 				resumeApp->signal();
   1065 			}
   1066 		}
   1067 
   1068 		pixelProgress[cluster].executing = false;
   1069 	}
   1070 
   1071 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
   1072 	{
   1073 		Triangle *triangle = triangleBatch[unit];
   1074 		int primitiveDrawCall = primitiveProgress[unit].drawCall;
   1075 		DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
   1076 		DrawData *data = draw->data;
   1077 		VertexTask *task = vertexTask[thread];
   1078 
   1079 		const void *indices = data->indices;
   1080 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
   1081 
   1082 		if(task->vertexCache.drawCall != primitiveDrawCall)
   1083 		{
   1084 			task->vertexCache.clear();
   1085 			task->vertexCache.drawCall = primitiveDrawCall;
   1086 		}
   1087 
   1088 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
   1089 
   1090 		switch(draw->drawType)
   1091 		{
   1092 		case DRAW_POINTLIST:
   1093 			{
   1094 				unsigned int index = start;
   1095 
   1096 				for(unsigned int i = 0; i < triangleCount; i++)
   1097 				{
   1098 					batch[i][0] = index;
   1099 					batch[i][1] = index;
   1100 					batch[i][2] = index;
   1101 
   1102 					index += 1;
   1103 				}
   1104 			}
   1105 			break;
   1106 		case DRAW_LINELIST:
   1107 			{
   1108 				unsigned int index = 2 * start;
   1109 
   1110 				for(unsigned int i = 0; i < triangleCount; i++)
   1111 				{
   1112 					batch[i][0] = index + 0;
   1113 					batch[i][1] = index + 1;
   1114 					batch[i][2] = index + 1;
   1115 
   1116 					index += 2;
   1117 				}
   1118 			}
   1119 			break;
   1120 		case DRAW_LINESTRIP:
   1121 			{
   1122 				unsigned int index = start;
   1123 
   1124 				for(unsigned int i = 0; i < triangleCount; i++)
   1125 				{
   1126 					batch[i][0] = index + 0;
   1127 					batch[i][1] = index + 1;
   1128 					batch[i][2] = index + 1;
   1129 
   1130 					index += 1;
   1131 				}
   1132 			}
   1133 			break;
   1134 		case DRAW_LINELOOP:
   1135 			{
   1136 				unsigned int index = start;
   1137 
   1138 				for(unsigned int i = 0; i < triangleCount; i++)
   1139 				{
   1140 					batch[i][0] = (index + 0) % loop;
   1141 					batch[i][1] = (index + 1) % loop;
   1142 					batch[i][2] = (index + 1) % loop;
   1143 
   1144 					index += 1;
   1145 				}
   1146 			}
   1147 			break;
   1148 		case DRAW_TRIANGLELIST:
   1149 			{
   1150 				unsigned int index = 3 * start;
   1151 
   1152 				for(unsigned int i = 0; i < triangleCount; i++)
   1153 				{
   1154 					batch[i][0] = index + 0;
   1155 					batch[i][1] = index + 1;
   1156 					batch[i][2] = index + 2;
   1157 
   1158 					index += 3;
   1159 				}
   1160 			}
   1161 			break;
   1162 		case DRAW_TRIANGLESTRIP:
   1163 			{
   1164 				unsigned int index = start;
   1165 
   1166 				for(unsigned int i = 0; i < triangleCount; i++)
   1167 				{
   1168 					if(leadingVertexFirst)
   1169 					{
   1170 						batch[i][0] = index + 0;
   1171 						batch[i][1] = index + (index & 1) + 1;
   1172 						batch[i][2] = index + (~index & 1) + 1;
   1173 					}
   1174 					else
   1175 					{
   1176 						batch[i][0] = index + (index & 1);
   1177 						batch[i][1] = index + (~index & 1);
   1178 						batch[i][2] = index + 2;
   1179 					}
   1180 
   1181 					index += 1;
   1182 				}
   1183 			}
   1184 			break;
   1185 		case DRAW_TRIANGLEFAN:
   1186 			{
   1187 				unsigned int index = start;
   1188 
   1189 				for(unsigned int i = 0; i < triangleCount; i++)
   1190 				{
   1191 					if(leadingVertexFirst)
   1192 					{
   1193 						batch[i][0] = index + 1;
   1194 						batch[i][1] = index + 2;
   1195 						batch[i][2] = 0;
   1196 					}
   1197 					else
   1198 					{
   1199 						batch[i][0] = 0;
   1200 						batch[i][1] = index + 1;
   1201 						batch[i][2] = index + 2;
   1202 					}
   1203 
   1204 					index += 1;
   1205 				}
   1206 			}
   1207 			break;
   1208 		case DRAW_INDEXEDPOINTLIST8:
   1209 			{
   1210 				const unsigned char *index = (const unsigned char*)indices + start;
   1211 
   1212 				for(unsigned int i = 0; i < triangleCount; i++)
   1213 				{
   1214 					batch[i][0] = *index;
   1215 					batch[i][1] = *index;
   1216 					batch[i][2] = *index;
   1217 
   1218 					index += 1;
   1219 				}
   1220 			}
   1221 			break;
   1222 		case DRAW_INDEXEDPOINTLIST16:
   1223 			{
   1224 				const unsigned short *index = (const unsigned short*)indices + start;
   1225 
   1226 				for(unsigned int i = 0; i < triangleCount; i++)
   1227 				{
   1228 					batch[i][0] = *index;
   1229 					batch[i][1] = *index;
   1230 					batch[i][2] = *index;
   1231 
   1232 					index += 1;
   1233 				}
   1234 			}
   1235 			break;
   1236 		case DRAW_INDEXEDPOINTLIST32:
   1237 			{
   1238 				const unsigned int *index = (const unsigned int*)indices + start;
   1239 
   1240 				for(unsigned int i = 0; i < triangleCount; i++)
   1241 				{
   1242 					batch[i][0] = *index;
   1243 					batch[i][1] = *index;
   1244 					batch[i][2] = *index;
   1245 
   1246 					index += 1;
   1247 				}
   1248 			}
   1249 			break;
   1250 		case DRAW_INDEXEDLINELIST8:
   1251 			{
   1252 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
   1253 
   1254 				for(unsigned int i = 0; i < triangleCount; i++)
   1255 				{
   1256 					batch[i][0] = index[0];
   1257 					batch[i][1] = index[1];
   1258 					batch[i][2] = index[1];
   1259 
   1260 					index += 2;
   1261 				}
   1262 			}
   1263 			break;
   1264 		case DRAW_INDEXEDLINELIST16:
   1265 			{
   1266 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
   1267 
   1268 				for(unsigned int i = 0; i < triangleCount; i++)
   1269 				{
   1270 					batch[i][0] = index[0];
   1271 					batch[i][1] = index[1];
   1272 					batch[i][2] = index[1];
   1273 
   1274 					index += 2;
   1275 				}
   1276 			}
   1277 			break;
   1278 		case DRAW_INDEXEDLINELIST32:
   1279 			{
   1280 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
   1281 
   1282 				for(unsigned int i = 0; i < triangleCount; i++)
   1283 				{
   1284 					batch[i][0] = index[0];
   1285 					batch[i][1] = index[1];
   1286 					batch[i][2] = index[1];
   1287 
   1288 					index += 2;
   1289 				}
   1290 			}
   1291 			break;
   1292 		case DRAW_INDEXEDLINESTRIP8:
   1293 			{
   1294 				const unsigned char *index = (const unsigned char*)indices + start;
   1295 
   1296 				for(unsigned int i = 0; i < triangleCount; i++)
   1297 				{
   1298 					batch[i][0] = index[0];
   1299 					batch[i][1] = index[1];
   1300 					batch[i][2] = index[1];
   1301 
   1302 					index += 1;
   1303 				}
   1304 			}
   1305 			break;
   1306 		case DRAW_INDEXEDLINESTRIP16:
   1307 			{
   1308 				const unsigned short *index = (const unsigned short*)indices + start;
   1309 
   1310 				for(unsigned int i = 0; i < triangleCount; i++)
   1311 				{
   1312 					batch[i][0] = index[0];
   1313 					batch[i][1] = index[1];
   1314 					batch[i][2] = index[1];
   1315 
   1316 					index += 1;
   1317 				}
   1318 			}
   1319 			break;
   1320 		case DRAW_INDEXEDLINESTRIP32:
   1321 			{
   1322 				const unsigned int *index = (const unsigned int*)indices + start;
   1323 
   1324 				for(unsigned int i = 0; i < triangleCount; i++)
   1325 				{
   1326 					batch[i][0] = index[0];
   1327 					batch[i][1] = index[1];
   1328 					batch[i][2] = index[1];
   1329 
   1330 					index += 1;
   1331 				}
   1332 			}
   1333 			break;
   1334 		case DRAW_INDEXEDLINELOOP8:
   1335 			{
   1336 				const unsigned char *index = (const unsigned char*)indices;
   1337 
   1338 				for(unsigned int i = 0; i < triangleCount; i++)
   1339 				{
   1340 					batch[i][0] = index[(start + i + 0) % loop];
   1341 					batch[i][1] = index[(start + i + 1) % loop];
   1342 					batch[i][2] = index[(start + i + 1) % loop];
   1343 				}
   1344 			}
   1345 			break;
   1346 		case DRAW_INDEXEDLINELOOP16:
   1347 			{
   1348 				const unsigned short *index = (const unsigned short*)indices;
   1349 
   1350 				for(unsigned int i = 0; i < triangleCount; i++)
   1351 				{
   1352 					batch[i][0] = index[(start + i + 0) % loop];
   1353 					batch[i][1] = index[(start + i + 1) % loop];
   1354 					batch[i][2] = index[(start + i + 1) % loop];
   1355 				}
   1356 			}
   1357 			break;
   1358 		case DRAW_INDEXEDLINELOOP32:
   1359 			{
   1360 				const unsigned int *index = (const unsigned int*)indices;
   1361 
   1362 				for(unsigned int i = 0; i < triangleCount; i++)
   1363 				{
   1364 					batch[i][0] = index[(start + i + 0) % loop];
   1365 					batch[i][1] = index[(start + i + 1) % loop];
   1366 					batch[i][2] = index[(start + i + 1) % loop];
   1367 				}
   1368 			}
   1369 			break;
   1370 		case DRAW_INDEXEDTRIANGLELIST8:
   1371 			{
   1372 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
   1373 
   1374 				for(unsigned int i = 0; i < triangleCount; i++)
   1375 				{
   1376 					batch[i][0] = index[0];
   1377 					batch[i][1] = index[1];
   1378 					batch[i][2] = index[2];
   1379 
   1380 					index += 3;
   1381 				}
   1382 			}
   1383 			break;
   1384 		case DRAW_INDEXEDTRIANGLELIST16:
   1385 			{
   1386 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
   1387 
   1388 				for(unsigned int i = 0; i < triangleCount; i++)
   1389 				{
   1390 					batch[i][0] = index[0];
   1391 					batch[i][1] = index[1];
   1392 					batch[i][2] = index[2];
   1393 
   1394 					index += 3;
   1395 				}
   1396 			}
   1397 			break;
   1398 		case DRAW_INDEXEDTRIANGLELIST32:
   1399 			{
   1400 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
   1401 
   1402 				for(unsigned int i = 0; i < triangleCount; i++)
   1403 				{
   1404 					batch[i][0] = index[0];
   1405 					batch[i][1] = index[1];
   1406 					batch[i][2] = index[2];
   1407 
   1408 					index += 3;
   1409 				}
   1410 			}
   1411 			break;
   1412 		case DRAW_INDEXEDTRIANGLESTRIP8:
   1413 			{
   1414 				const unsigned char *index = (const unsigned char*)indices + start;
   1415 
   1416 				for(unsigned int i = 0; i < triangleCount; i++)
   1417 				{
   1418 					batch[i][0] = index[0];
   1419 					batch[i][1] = index[((start + i) & 1) + 1];
   1420 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1421 
   1422 					index += 1;
   1423 				}
   1424 			}
   1425 			break;
   1426 		case DRAW_INDEXEDTRIANGLESTRIP16:
   1427 			{
   1428 				const unsigned short *index = (const unsigned short*)indices + start;
   1429 
   1430 				for(unsigned int i = 0; i < triangleCount; i++)
   1431 				{
   1432 					batch[i][0] = index[0];
   1433 					batch[i][1] = index[((start + i) & 1) + 1];
   1434 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1435 
   1436 					index += 1;
   1437 				}
   1438 			}
   1439 			break;
   1440 		case DRAW_INDEXEDTRIANGLESTRIP32:
   1441 			{
   1442 				const unsigned int *index = (const unsigned int*)indices + start;
   1443 
   1444 				for(unsigned int i = 0; i < triangleCount; i++)
   1445 				{
   1446 					batch[i][0] = index[0];
   1447 					batch[i][1] = index[((start + i) & 1) + 1];
   1448 					batch[i][2] = index[(~(start + i) & 1) + 1];
   1449 
   1450 					index += 1;
   1451 				}
   1452 			}
   1453 			break;
   1454 		case DRAW_INDEXEDTRIANGLEFAN8:
   1455 			{
   1456 				const unsigned char *index = (const unsigned char*)indices;
   1457 
   1458 				for(unsigned int i = 0; i < triangleCount; i++)
   1459 				{
   1460 					batch[i][0] = index[start + i + 1];
   1461 					batch[i][1] = index[start + i + 2];
   1462 					batch[i][2] = index[0];
   1463 				}
   1464 			}
   1465 			break;
   1466 		case DRAW_INDEXEDTRIANGLEFAN16:
   1467 			{
   1468 				const unsigned short *index = (const unsigned short*)indices;
   1469 
   1470 				for(unsigned int i = 0; i < triangleCount; i++)
   1471 				{
   1472 					batch[i][0] = index[start + i + 1];
   1473 					batch[i][1] = index[start + i + 2];
   1474 					batch[i][2] = index[0];
   1475 				}
   1476 			}
   1477 			break;
   1478 		case DRAW_INDEXEDTRIANGLEFAN32:
   1479 			{
   1480 				const unsigned int *index = (const unsigned int*)indices;
   1481 
   1482 				for(unsigned int i = 0; i < triangleCount; i++)
   1483 				{
   1484 					batch[i][0] = index[start + i + 1];
   1485 					batch[i][1] = index[start + i + 2];
   1486 					batch[i][2] = index[0];
   1487 				}
   1488 			}
   1489 			break;
   1490 		case DRAW_QUADLIST:
   1491 			{
   1492 				unsigned int index = 4 * start / 2;
   1493 
   1494 				for(unsigned int i = 0; i < triangleCount; i += 2)
   1495 				{
   1496 					batch[i+0][0] = index + 0;
   1497 					batch[i+0][1] = index + 1;
   1498 					batch[i+0][2] = index + 2;
   1499 
   1500 					batch[i+1][0] = index + 0;
   1501 					batch[i+1][1] = index + 2;
   1502 					batch[i+1][2] = index + 3;
   1503 
   1504 					index += 4;
   1505 				}
   1506 			}
   1507 			break;
   1508 		default:
   1509 			ASSERT(false);
   1510 			return;
   1511 		}
   1512 
   1513 		task->primitiveStart = start;
   1514 		task->vertexCount = triangleCount * 3;
   1515 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
   1516 	}
   1517 
   1518 	int Renderer::setupSolidTriangles(int unit, int count)
   1519 	{
   1520 		Triangle *triangle = triangleBatch[unit];
   1521 		Primitive *primitive = primitiveBatch[unit];
   1522 
   1523 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1524 		SetupProcessor::State &state = draw.setupState;
   1525 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1526 
   1527 		int ms = state.multiSample;
   1528 		int pos = state.positionRegister;
   1529 		const DrawData *data = draw.data;
   1530 		int visible = 0;
   1531 
   1532 		for(int i = 0; i < count; i++, triangle++)
   1533 		{
   1534 			Vertex &v0 = triangle->v0;
   1535 			Vertex &v1 = triangle->v1;
   1536 			Vertex &v2 = triangle->v2;
   1537 
   1538 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
   1539 			{
   1540 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
   1541 
   1542 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
   1543 
   1544 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1545 				{
   1546 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1547 					{
   1548 						continue;
   1549 					}
   1550 				}
   1551 
   1552 				if(setupRoutine(primitive, triangle, &polygon, data))
   1553 				{
   1554 					primitive += ms;
   1555 					visible++;
   1556 				}
   1557 			}
   1558 		}
   1559 
   1560 		return visible;
   1561 	}
   1562 
   1563 	int Renderer::setupWireframeTriangle(int unit, int count)
   1564 	{
   1565 		Triangle *triangle = triangleBatch[unit];
   1566 		Primitive *primitive = primitiveBatch[unit];
   1567 		int visible = 0;
   1568 
   1569 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1570 		SetupProcessor::State &state = draw.setupState;
   1571 
   1572 		const Vertex &v0 = triangle[0].v0;
   1573 		const Vertex &v1 = triangle[0].v1;
   1574 		const Vertex &v2 = triangle[0].v2;
   1575 
   1576 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1577 
   1578 		if(state.cullMode == CULL_CLOCKWISE)
   1579 		{
   1580 			if(d >= 0) return 0;
   1581 		}
   1582 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1583 		{
   1584 			if(d <= 0) return 0;
   1585 		}
   1586 
   1587 		// Copy attributes
   1588 		triangle[1].v0 = v1;
   1589 		triangle[1].v1 = v2;
   1590 		triangle[2].v0 = v2;
   1591 		triangle[2].v1 = v0;
   1592 
   1593 		if(state.color[0][0].flat)   // FIXME
   1594 		{
   1595 			for(int i = 0; i < 2; i++)
   1596 			{
   1597 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
   1598 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
   1599 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
   1600 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
   1601 			}
   1602 		}
   1603 
   1604 		for(int i = 0; i < 3; i++)
   1605 		{
   1606 			if(setupLine(*primitive, *triangle, draw))
   1607 			{
   1608 				primitive->area = 0.5f * d;
   1609 
   1610 				primitive++;
   1611 				visible++;
   1612 			}
   1613 
   1614 			triangle++;
   1615 		}
   1616 
   1617 		return visible;
   1618 	}
   1619 
   1620 	int Renderer::setupVertexTriangle(int unit, int count)
   1621 	{
   1622 		Triangle *triangle = triangleBatch[unit];
   1623 		Primitive *primitive = primitiveBatch[unit];
   1624 		int visible = 0;
   1625 
   1626 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1627 		SetupProcessor::State &state = draw.setupState;
   1628 
   1629 		const Vertex &v0 = triangle[0].v0;
   1630 		const Vertex &v1 = triangle[0].v1;
   1631 		const Vertex &v2 = triangle[0].v2;
   1632 
   1633 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
   1634 
   1635 		if(state.cullMode == CULL_CLOCKWISE)
   1636 		{
   1637 			if(d >= 0) return 0;
   1638 		}
   1639 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
   1640 		{
   1641 			if(d <= 0) return 0;
   1642 		}
   1643 
   1644 		// Copy attributes
   1645 		triangle[1].v0 = v1;
   1646 		triangle[2].v0 = v2;
   1647 
   1648 		for(int i = 0; i < 3; i++)
   1649 		{
   1650 			if(setupPoint(*primitive, *triangle, draw))
   1651 			{
   1652 				primitive->area = 0.5f * d;
   1653 
   1654 				primitive++;
   1655 				visible++;
   1656 			}
   1657 
   1658 			triangle++;
   1659 		}
   1660 
   1661 		return visible;
   1662 	}
   1663 
   1664 	int Renderer::setupLines(int unit, int count)
   1665 	{
   1666 		Triangle *triangle = triangleBatch[unit];
   1667 		Primitive *primitive = primitiveBatch[unit];
   1668 		int visible = 0;
   1669 
   1670 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1671 		SetupProcessor::State &state = draw.setupState;
   1672 
   1673 		int ms = state.multiSample;
   1674 
   1675 		for(int i = 0; i < count; i++)
   1676 		{
   1677 			if(setupLine(*primitive, *triangle, draw))
   1678 			{
   1679 				primitive += ms;
   1680 				visible++;
   1681 			}
   1682 
   1683 			triangle++;
   1684 		}
   1685 
   1686 		return visible;
   1687 	}
   1688 
   1689 	int Renderer::setupPoints(int unit, int count)
   1690 	{
   1691 		Triangle *triangle = triangleBatch[unit];
   1692 		Primitive *primitive = primitiveBatch[unit];
   1693 		int visible = 0;
   1694 
   1695 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
   1696 		SetupProcessor::State &state = draw.setupState;
   1697 
   1698 		int ms = state.multiSample;
   1699 
   1700 		for(int i = 0; i < count; i++)
   1701 		{
   1702 			if(setupPoint(*primitive, *triangle, draw))
   1703 			{
   1704 				primitive += ms;
   1705 				visible++;
   1706 			}
   1707 
   1708 			triangle++;
   1709 		}
   1710 
   1711 		return visible;
   1712 	}
   1713 
   1714 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1715 	{
   1716 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1717 		const SetupProcessor::State &state = draw.setupState;
   1718 		const DrawData &data = *draw.data;
   1719 
   1720 		float lineWidth = data.lineWidth;
   1721 
   1722 		Vertex &v0 = triangle.v0;
   1723 		Vertex &v1 = triangle.v1;
   1724 
   1725 		int pos = state.positionRegister;
   1726 
   1727 		const float4 &P0 = v0.v[pos];
   1728 		const float4 &P1 = v1.v[pos];
   1729 
   1730 		if(P0.w <= 0 && P1.w <= 0)
   1731 		{
   1732 			return false;
   1733 		}
   1734 
   1735 		const float W = data.Wx16[0] * (1.0f / 16.0f);
   1736 		const float H = data.Hx16[0] * (1.0f / 16.0f);
   1737 
   1738 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
   1739 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
   1740 
   1741 		if(dx == 0 && dy == 0)
   1742 		{
   1743 			return false;
   1744 		}
   1745 
   1746 		if(state.multiSample > 1)   // Rectangle
   1747 		{
   1748 			float4 P[4];
   1749 			int C[4];
   1750 
   1751 			P[0] = P0;
   1752 			P[1] = P1;
   1753 			P[2] = P1;
   1754 			P[3] = P0;
   1755 
   1756 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
   1757 
   1758 			dx *= scale;
   1759 			dy *= scale;
   1760 
   1761 			float dx0h = dx * P0.w / H;
   1762 			float dy0w = dy * P0.w / W;
   1763 
   1764 			float dx1h = dx * P1.w / H;
   1765 			float dy1w = dy * P1.w / W;
   1766 
   1767 			P[0].x += -dy0w;
   1768 			P[0].y += +dx0h;
   1769 			C[0] = clipper->computeClipFlags(P[0]);
   1770 
   1771 			P[1].x += -dy1w;
   1772 			P[1].y += +dx1h;
   1773 			C[1] = clipper->computeClipFlags(P[1]);
   1774 
   1775 			P[2].x += +dy1w;
   1776 			P[2].y += -dx1h;
   1777 			C[2] = clipper->computeClipFlags(P[2]);
   1778 
   1779 			P[3].x += +dy0w;
   1780 			P[3].y += -dx0h;
   1781 			C[3] = clipper->computeClipFlags(P[3]);
   1782 
   1783 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1784 			{
   1785 				Polygon polygon(P, 4);
   1786 
   1787 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1788 
   1789 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1790 				{
   1791 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1792 					{
   1793 						return false;
   1794 					}
   1795 				}
   1796 
   1797 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1798 			}
   1799 		}
   1800 		else   // Diamond test convention
   1801 		{
   1802 			float4 P[8];
   1803 			int C[8];
   1804 
   1805 			P[0] = P0;
   1806 			P[1] = P0;
   1807 			P[2] = P0;
   1808 			P[3] = P0;
   1809 			P[4] = P1;
   1810 			P[5] = P1;
   1811 			P[6] = P1;
   1812 			P[7] = P1;
   1813 
   1814 			float dx0 = lineWidth * 0.5f * P0.w / W;
   1815 			float dy0 = lineWidth * 0.5f * P0.w / H;
   1816 
   1817 			float dx1 = lineWidth * 0.5f * P1.w / W;
   1818 			float dy1 = lineWidth * 0.5f * P1.w / H;
   1819 
   1820 			P[0].x += -dx0;
   1821 			C[0] = clipper->computeClipFlags(P[0]);
   1822 
   1823 			P[1].y += +dy0;
   1824 			C[1] = clipper->computeClipFlags(P[1]);
   1825 
   1826 			P[2].x += +dx0;
   1827 			C[2] = clipper->computeClipFlags(P[2]);
   1828 
   1829 			P[3].y += -dy0;
   1830 			C[3] = clipper->computeClipFlags(P[3]);
   1831 
   1832 			P[4].x += -dx1;
   1833 			C[4] = clipper->computeClipFlags(P[4]);
   1834 
   1835 			P[5].y += +dy1;
   1836 			C[5] = clipper->computeClipFlags(P[5]);
   1837 
   1838 			P[6].x += +dx1;
   1839 			C[6] = clipper->computeClipFlags(P[6]);
   1840 
   1841 			P[7].y += -dy1;
   1842 			C[7] = clipper->computeClipFlags(P[7]);
   1843 
   1844 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
   1845 			{
   1846 				float4 L[6];
   1847 
   1848 				if(dx > -dy)
   1849 				{
   1850 					if(dx > dy)   // Right
   1851 					{
   1852 						L[0] = P[0];
   1853 						L[1] = P[1];
   1854 						L[2] = P[5];
   1855 						L[3] = P[6];
   1856 						L[4] = P[7];
   1857 						L[5] = P[3];
   1858 					}
   1859 					else   // Down
   1860 					{
   1861 						L[0] = P[0];
   1862 						L[1] = P[4];
   1863 						L[2] = P[5];
   1864 						L[3] = P[6];
   1865 						L[4] = P[2];
   1866 						L[5] = P[3];
   1867 					}
   1868 				}
   1869 				else
   1870 				{
   1871 					if(dx > dy)   // Up
   1872 					{
   1873 						L[0] = P[0];
   1874 						L[1] = P[1];
   1875 						L[2] = P[2];
   1876 						L[3] = P[6];
   1877 						L[4] = P[7];
   1878 						L[5] = P[4];
   1879 					}
   1880 					else   // Left
   1881 					{
   1882 						L[0] = P[1];
   1883 						L[1] = P[2];
   1884 						L[2] = P[3];
   1885 						L[3] = P[7];
   1886 						L[4] = P[4];
   1887 						L[5] = P[5];
   1888 					}
   1889 				}
   1890 
   1891 				Polygon polygon(L, 6);
   1892 
   1893 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
   1894 
   1895 				if(clipFlagsOr != Clipper::CLIP_FINITE)
   1896 				{
   1897 					if(!clipper->clip(polygon, clipFlagsOr, draw))
   1898 					{
   1899 						return false;
   1900 					}
   1901 				}
   1902 
   1903 				return setupRoutine(&primitive, &triangle, &polygon, &data);
   1904 			}
   1905 		}
   1906 
   1907 		return false;
   1908 	}
   1909 
   1910 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
   1911 	{
   1912 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
   1913 		const SetupProcessor::State &state = draw.setupState;
   1914 		const DrawData &data = *draw.data;
   1915 
   1916 		Vertex &v = triangle.v0;
   1917 
   1918 		float pSize;
   1919 
   1920 		int pts = state.pointSizeRegister;
   1921 
   1922 		if(state.pointSizeRegister != Unused)
   1923 		{
   1924 			pSize = v.v[pts].y;
   1925 		}
   1926 		else
   1927 		{
   1928 			pSize = data.point.pointSize[0];
   1929 		}
   1930 
   1931 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
   1932 
   1933 		float4 P[4];
   1934 		int C[4];
   1935 
   1936 		int pos = state.positionRegister;
   1937 
   1938 		P[0] = v.v[pos];
   1939 		P[1] = v.v[pos];
   1940 		P[2] = v.v[pos];
   1941 		P[3] = v.v[pos];
   1942 
   1943 		const float X = pSize * P[0].w * data.halfPixelX[0];
   1944 		const float Y = pSize * P[0].w * data.halfPixelY[0];
   1945 
   1946 		P[0].x -= X;
   1947 		P[0].y += Y;
   1948 		C[0] = clipper->computeClipFlags(P[0]);
   1949 
   1950 		P[1].x += X;
   1951 		P[1].y += Y;
   1952 		C[1] = clipper->computeClipFlags(P[1]);
   1953 
   1954 		P[2].x += X;
   1955 		P[2].y -= Y;
   1956 		C[2] = clipper->computeClipFlags(P[2]);
   1957 
   1958 		P[3].x -= X;
   1959 		P[3].y -= Y;
   1960 		C[3] = clipper->computeClipFlags(P[3]);
   1961 
   1962 		triangle.v1 = triangle.v0;
   1963 		triangle.v2 = triangle.v0;
   1964 
   1965 		triangle.v1.X += iround(16 * 0.5f * pSize);
   1966 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
   1967 
   1968 		Polygon polygon(P, 4);
   1969 
   1970 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
   1971 		{
   1972 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
   1973 
   1974 			if(clipFlagsOr != Clipper::CLIP_FINITE)
   1975 			{
   1976 				if(!clipper->clip(polygon, clipFlagsOr, draw))
   1977 				{
   1978 					return false;
   1979 				}
   1980 			}
   1981 
   1982 			return setupRoutine(&primitive, &triangle, &polygon, &data);
   1983 		}
   1984 
   1985 		return false;
   1986 	}
   1987 
   1988 	void Renderer::initializeThreads()
   1989 	{
   1990 		unitCount = ceilPow2(threadCount);
   1991 		clusterCount = ceilPow2(threadCount);
   1992 
   1993 		for(int i = 0; i < unitCount; i++)
   1994 		{
   1995 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
   1996 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
   1997 		}
   1998 
   1999 		for(int i = 0; i < threadCount; i++)
   2000 		{
   2001 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
   2002 			vertexTask[i]->vertexCache.drawCall = -1;
   2003 
   2004 			task[i].type = Task::SUSPEND;
   2005 
   2006 			resume[i] = new Event();
   2007 			suspend[i] = new Event();
   2008 
   2009 			Parameters parameters;
   2010 			parameters.threadIndex = i;
   2011 			parameters.renderer = this;
   2012 
   2013 			exitThreads = false;
   2014 			worker[i] = new Thread(threadFunction, &parameters);
   2015 
   2016 			suspend[i]->wait();
   2017 			suspend[i]->signal();
   2018 		}
   2019 	}
   2020 
   2021 	void Renderer::terminateThreads()
   2022 	{
   2023 		while(threadsAwake != 0)
   2024 		{
   2025 			Thread::sleep(1);
   2026 		}
   2027 
   2028 		for(int thread = 0; thread < threadCount; thread++)
   2029 		{
   2030 			if(worker[thread])
   2031 			{
   2032 				exitThreads = true;
   2033 				resume[thread]->signal();
   2034 				worker[thread]->join();
   2035 
   2036 				delete worker[thread];
   2037 				worker[thread] = 0;
   2038 				delete resume[thread];
   2039 				resume[thread] = 0;
   2040 				delete suspend[thread];
   2041 				suspend[thread] = 0;
   2042 			}
   2043 
   2044 			deallocate(vertexTask[thread]);
   2045 			vertexTask[thread] = 0;
   2046 		}
   2047 
   2048 		for(int i = 0; i < 16; i++)
   2049 		{
   2050 			deallocate(triangleBatch[i]);
   2051 			triangleBatch[i] = 0;
   2052 
   2053 			deallocate(primitiveBatch[i]);
   2054 			primitiveBatch[i] = 0;
   2055 		}
   2056 	}
   2057 
   2058 	void Renderer::loadConstants(const VertexShader *vertexShader)
   2059 	{
   2060 		if(!vertexShader) return;
   2061 
   2062 		size_t count = vertexShader->getLength();
   2063 
   2064 		for(size_t i = 0; i < count; i++)
   2065 		{
   2066 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
   2067 
   2068 			if(instruction->opcode == Shader::OPCODE_DEF)
   2069 			{
   2070 				int index = instruction->dst.index;
   2071 				float value[4];
   2072 
   2073 				value[0] = instruction->src[0].value[0];
   2074 				value[1] = instruction->src[0].value[1];
   2075 				value[2] = instruction->src[0].value[2];
   2076 				value[3] = instruction->src[0].value[3];
   2077 
   2078 				setVertexShaderConstantF(index, value);
   2079 			}
   2080 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2081 			{
   2082 				int index = instruction->dst.index;
   2083 				int integer[4];
   2084 
   2085 				integer[0] = instruction->src[0].integer[0];
   2086 				integer[1] = instruction->src[0].integer[1];
   2087 				integer[2] = instruction->src[0].integer[2];
   2088 				integer[3] = instruction->src[0].integer[3];
   2089 
   2090 				setVertexShaderConstantI(index, integer);
   2091 			}
   2092 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2093 			{
   2094 				int index = instruction->dst.index;
   2095 				int boolean = instruction->src[0].boolean[0];
   2096 
   2097 				setVertexShaderConstantB(index, &boolean);
   2098 			}
   2099 		}
   2100 	}
   2101 
   2102 	void Renderer::loadConstants(const PixelShader *pixelShader)
   2103 	{
   2104 		if(!pixelShader) return;
   2105 
   2106 		size_t count = pixelShader->getLength();
   2107 
   2108 		for(size_t i = 0; i < count; i++)
   2109 		{
   2110 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
   2111 
   2112 			if(instruction->opcode == Shader::OPCODE_DEF)
   2113 			{
   2114 				int index = instruction->dst.index;
   2115 				float value[4];
   2116 
   2117 				value[0] = instruction->src[0].value[0];
   2118 				value[1] = instruction->src[0].value[1];
   2119 				value[2] = instruction->src[0].value[2];
   2120 				value[3] = instruction->src[0].value[3];
   2121 
   2122 				setPixelShaderConstantF(index, value);
   2123 			}
   2124 			else if(instruction->opcode == Shader::OPCODE_DEFI)
   2125 			{
   2126 				int index = instruction->dst.index;
   2127 				int integer[4];
   2128 
   2129 				integer[0] = instruction->src[0].integer[0];
   2130 				integer[1] = instruction->src[0].integer[1];
   2131 				integer[2] = instruction->src[0].integer[2];
   2132 				integer[3] = instruction->src[0].integer[3];
   2133 
   2134 				setPixelShaderConstantI(index, integer);
   2135 			}
   2136 			else if(instruction->opcode == Shader::OPCODE_DEFB)
   2137 			{
   2138 				int index = instruction->dst.index;
   2139 				int boolean = instruction->src[0].boolean[0];
   2140 
   2141 				setPixelShaderConstantB(index, &boolean);
   2142 			}
   2143 		}
   2144 	}
   2145 
   2146 	void Renderer::setIndexBuffer(Resource *indexBuffer)
   2147 	{
   2148 		context->indexBuffer = indexBuffer;
   2149 	}
   2150 
   2151 	void Renderer::setMultiSampleMask(unsigned int mask)
   2152 	{
   2153 		context->sampleMask = mask;
   2154 	}
   2155 
   2156 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
   2157 	{
   2158 		sw::transparencyAntialiasing = transparencyAntialiasing;
   2159 	}
   2160 
   2161 	bool Renderer::isReadWriteTexture(int sampler)
   2162 	{
   2163 		for(int index = 0; index < RENDERTARGETS; index++)
   2164 		{
   2165 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
   2166 			{
   2167 				return true;
   2168 			}
   2169 		}
   2170 
   2171 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
   2172 		{
   2173 			return true;
   2174 		}
   2175 
   2176 		return false;
   2177 	}
   2178 
   2179 	void Renderer::updateClipper()
   2180 	{
   2181 		if(updateClipPlanes)
   2182 		{
   2183 			if(VertexProcessor::isFixedFunction())   // User plane in world space
   2184 			{
   2185 				const Matrix &scissorWorld = getViewTransform();
   2186 
   2187 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
   2188 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
   2189 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
   2190 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
   2191 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
   2192 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
   2193 			}
   2194 			else   // User plane in clip space
   2195 			{
   2196 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
   2197 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
   2198 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
   2199 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
   2200 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
   2201 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
   2202 			}
   2203 
   2204 			updateClipPlanes = false;
   2205 		}
   2206 	}
   2207 
   2208 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
   2209 	{
   2210 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
   2211 
   2212 		context->texture[sampler] = resource;
   2213 	}
   2214 
   2215 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
   2216 	{
   2217 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
   2218 
   2219 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
   2220 	}
   2221 
   2222 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
   2223 	{
   2224 		if(type == SAMPLER_PIXEL)
   2225 		{
   2226 			PixelProcessor::setTextureFilter(sampler, textureFilter);
   2227 		}
   2228 		else
   2229 		{
   2230 			VertexProcessor::setTextureFilter(sampler, textureFilter);
   2231 		}
   2232 	}
   2233 
   2234 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
   2235 	{
   2236 		if(type == SAMPLER_PIXEL)
   2237 		{
   2238 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
   2239 		}
   2240 		else
   2241 		{
   2242 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
   2243 		}
   2244 	}
   2245 
   2246 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
   2247 	{
   2248 		if(type == SAMPLER_PIXEL)
   2249 		{
   2250 			PixelProcessor::setGatherEnable(sampler, enable);
   2251 		}
   2252 		else
   2253 		{
   2254 			VertexProcessor::setGatherEnable(sampler, enable);
   2255 		}
   2256 	}
   2257 
   2258 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
   2259 	{
   2260 		if(type == SAMPLER_PIXEL)
   2261 		{
   2262 			PixelProcessor::setAddressingModeU(sampler, addressMode);
   2263 		}
   2264 		else
   2265 		{
   2266 			VertexProcessor::setAddressingModeU(sampler, addressMode);
   2267 		}
   2268 	}
   2269 
   2270 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
   2271 	{
   2272 		if(type == SAMPLER_PIXEL)
   2273 		{
   2274 			PixelProcessor::setAddressingModeV(sampler, addressMode);
   2275 		}
   2276 		else
   2277 		{
   2278 			VertexProcessor::setAddressingModeV(sampler, addressMode);
   2279 		}
   2280 	}
   2281 
   2282 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
   2283 	{
   2284 		if(type == SAMPLER_PIXEL)
   2285 		{
   2286 			PixelProcessor::setAddressingModeW(sampler, addressMode);
   2287 		}
   2288 		else
   2289 		{
   2290 			VertexProcessor::setAddressingModeW(sampler, addressMode);
   2291 		}
   2292 	}
   2293 
   2294 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
   2295 	{
   2296 		if(type == SAMPLER_PIXEL)
   2297 		{
   2298 			PixelProcessor::setReadSRGB(sampler, sRGB);
   2299 		}
   2300 		else
   2301 		{
   2302 			VertexProcessor::setReadSRGB(sampler, sRGB);
   2303 		}
   2304 	}
   2305 
   2306 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
   2307 	{
   2308 		if(type == SAMPLER_PIXEL)
   2309 		{
   2310 			PixelProcessor::setMipmapLOD(sampler, bias);
   2311 		}
   2312 		else
   2313 		{
   2314 			VertexProcessor::setMipmapLOD(sampler, bias);
   2315 		}
   2316 	}
   2317 
   2318 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
   2319 	{
   2320 		if(type == SAMPLER_PIXEL)
   2321 		{
   2322 			PixelProcessor::setBorderColor(sampler, borderColor);
   2323 		}
   2324 		else
   2325 		{
   2326 			VertexProcessor::setBorderColor(sampler, borderColor);
   2327 		}
   2328 	}
   2329 
   2330 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
   2331 	{
   2332 		if(type == SAMPLER_PIXEL)
   2333 		{
   2334 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2335 		}
   2336 		else
   2337 		{
   2338 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
   2339 		}
   2340 	}
   2341 
   2342 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
   2343 	{
   2344 		if(type == SAMPLER_PIXEL)
   2345 		{
   2346 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2347 		}
   2348 		else
   2349 		{
   2350 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
   2351 		}
   2352 	}
   2353 
   2354 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
   2355 	{
   2356 		if(type == SAMPLER_PIXEL)
   2357 		{
   2358 			PixelProcessor::setSwizzleR(sampler, swizzleR);
   2359 		}
   2360 		else
   2361 		{
   2362 			VertexProcessor::setSwizzleR(sampler, swizzleR);
   2363 		}
   2364 	}
   2365 
   2366 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
   2367 	{
   2368 		if(type == SAMPLER_PIXEL)
   2369 		{
   2370 			PixelProcessor::setSwizzleG(sampler, swizzleG);
   2371 		}
   2372 		else
   2373 		{
   2374 			VertexProcessor::setSwizzleG(sampler, swizzleG);
   2375 		}
   2376 	}
   2377 
   2378 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
   2379 	{
   2380 		if(type == SAMPLER_PIXEL)
   2381 		{
   2382 			PixelProcessor::setSwizzleB(sampler, swizzleB);
   2383 		}
   2384 		else
   2385 		{
   2386 			VertexProcessor::setSwizzleB(sampler, swizzleB);
   2387 		}
   2388 	}
   2389 
   2390 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
   2391 	{
   2392 		if(type == SAMPLER_PIXEL)
   2393 		{
   2394 			PixelProcessor::setSwizzleA(sampler, swizzleA);
   2395 		}
   2396 		else
   2397 		{
   2398 			VertexProcessor::setSwizzleA(sampler, swizzleA);
   2399 		}
   2400 	}
   2401 
   2402 	void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
   2403 	{
   2404 		if(type == SAMPLER_PIXEL)
   2405 		{
   2406 			PixelProcessor::setCompareFunc(sampler, compFunc);
   2407 		}
   2408 		else
   2409 		{
   2410 			VertexProcessor::setCompareFunc(sampler, compFunc);
   2411 		}
   2412 	}
   2413 
   2414 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
   2415 	{
   2416 		if(type == SAMPLER_PIXEL)
   2417 		{
   2418 			PixelProcessor::setBaseLevel(sampler, baseLevel);
   2419 		}
   2420 		else
   2421 		{
   2422 			VertexProcessor::setBaseLevel(sampler, baseLevel);
   2423 		}
   2424 	}
   2425 
   2426 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
   2427 	{
   2428 		if(type == SAMPLER_PIXEL)
   2429 		{
   2430 			PixelProcessor::setMaxLevel(sampler, maxLevel);
   2431 		}
   2432 		else
   2433 		{
   2434 			VertexProcessor::setMaxLevel(sampler, maxLevel);
   2435 		}
   2436 	}
   2437 
   2438 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
   2439 	{
   2440 		if(type == SAMPLER_PIXEL)
   2441 		{
   2442 			PixelProcessor::setMinLod(sampler, minLod);
   2443 		}
   2444 		else
   2445 		{
   2446 			VertexProcessor::setMinLod(sampler, minLod);
   2447 		}
   2448 	}
   2449 
   2450 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
   2451 	{
   2452 		if(type == SAMPLER_PIXEL)
   2453 		{
   2454 			PixelProcessor::setMaxLod(sampler, maxLod);
   2455 		}
   2456 		else
   2457 		{
   2458 			VertexProcessor::setMaxLod(sampler, maxLod);
   2459 		}
   2460 	}
   2461 
   2462 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
   2463 	{
   2464 		context->setPointSpriteEnable(pointSpriteEnable);
   2465 	}
   2466 
   2467 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
   2468 	{
   2469 		context->setPointScaleEnable(pointScaleEnable);
   2470 	}
   2471 
   2472 	void Renderer::setLineWidth(float width)
   2473 	{
   2474 		context->lineWidth = width;
   2475 	}
   2476 
   2477 	void Renderer::setDepthBias(float bias)
   2478 	{
   2479 		context->depthBias = bias;
   2480 	}
   2481 
   2482 	void Renderer::setSlopeDepthBias(float slopeBias)
   2483 	{
   2484 		context->slopeDepthBias = slopeBias;
   2485 	}
   2486 
   2487 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
   2488 	{
   2489 		context->rasterizerDiscard = rasterizerDiscard;
   2490 	}
   2491 
   2492 	void Renderer::setPixelShader(const PixelShader *shader)
   2493 	{
   2494 		context->pixelShader = shader;
   2495 
   2496 		loadConstants(shader);
   2497 	}
   2498 
   2499 	void Renderer::setVertexShader(const VertexShader *shader)
   2500 	{
   2501 		context->vertexShader = shader;
   2502 
   2503 		loadConstants(shader);
   2504 	}
   2505 
   2506 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2507 	{
   2508 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2509 		{
   2510 			if(drawCall[i]->psDirtyConstF < index + count)
   2511 			{
   2512 				drawCall[i]->psDirtyConstF = index + count;
   2513 			}
   2514 		}
   2515 
   2516 		for(unsigned int i = 0; i < count; i++)
   2517 		{
   2518 			PixelProcessor::setFloatConstant(index + i, value);
   2519 			value += 4;
   2520 		}
   2521 	}
   2522 
   2523 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2524 	{
   2525 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2526 		{
   2527 			if(drawCall[i]->psDirtyConstI < index + count)
   2528 			{
   2529 				drawCall[i]->psDirtyConstI = index + count;
   2530 			}
   2531 		}
   2532 
   2533 		for(unsigned int i = 0; i < count; i++)
   2534 		{
   2535 			PixelProcessor::setIntegerConstant(index + i, value);
   2536 			value += 4;
   2537 		}
   2538 	}
   2539 
   2540 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2541 	{
   2542 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2543 		{
   2544 			if(drawCall[i]->psDirtyConstB < index + count)
   2545 			{
   2546 				drawCall[i]->psDirtyConstB = index + count;
   2547 			}
   2548 		}
   2549 
   2550 		for(unsigned int i = 0; i < count; i++)
   2551 		{
   2552 			PixelProcessor::setBooleanConstant(index + i, *boolean);
   2553 			boolean++;
   2554 		}
   2555 	}
   2556 
   2557 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
   2558 	{
   2559 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2560 		{
   2561 			if(drawCall[i]->vsDirtyConstF < index + count)
   2562 			{
   2563 				drawCall[i]->vsDirtyConstF = index + count;
   2564 			}
   2565 		}
   2566 
   2567 		for(unsigned int i = 0; i < count; i++)
   2568 		{
   2569 			VertexProcessor::setFloatConstant(index + i, value);
   2570 			value += 4;
   2571 		}
   2572 	}
   2573 
   2574 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
   2575 	{
   2576 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2577 		{
   2578 			if(drawCall[i]->vsDirtyConstI < index + count)
   2579 			{
   2580 				drawCall[i]->vsDirtyConstI = index + count;
   2581 			}
   2582 		}
   2583 
   2584 		for(unsigned int i = 0; i < count; i++)
   2585 		{
   2586 			VertexProcessor::setIntegerConstant(index + i, value);
   2587 			value += 4;
   2588 		}
   2589 	}
   2590 
   2591 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
   2592 	{
   2593 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
   2594 		{
   2595 			if(drawCall[i]->vsDirtyConstB < index + count)
   2596 			{
   2597 				drawCall[i]->vsDirtyConstB = index + count;
   2598 			}
   2599 		}
   2600 
   2601 		for(unsigned int i = 0; i < count; i++)
   2602 		{
   2603 			VertexProcessor::setBooleanConstant(index + i, *boolean);
   2604 			boolean++;
   2605 		}
   2606 	}
   2607 
   2608 	void Renderer::setModelMatrix(const Matrix &M, int i)
   2609 	{
   2610 		VertexProcessor::setModelMatrix(M, i);
   2611 	}
   2612 
   2613 	void Renderer::setViewMatrix(const Matrix &V)
   2614 	{
   2615 		VertexProcessor::setViewMatrix(V);
   2616 		updateClipPlanes = true;
   2617 	}
   2618 
   2619 	void Renderer::setBaseMatrix(const Matrix &B)
   2620 	{
   2621 		VertexProcessor::setBaseMatrix(B);
   2622 		updateClipPlanes = true;
   2623 	}
   2624 
   2625 	void Renderer::setProjectionMatrix(const Matrix &P)
   2626 	{
   2627 		VertexProcessor::setProjectionMatrix(P);
   2628 		updateClipPlanes = true;
   2629 	}
   2630 
   2631 	void Renderer::addQuery(Query *query)
   2632 	{
   2633 		queries.push_back(query);
   2634 	}
   2635 
   2636 	void Renderer::removeQuery(Query *query)
   2637 	{
   2638 		queries.remove(query);
   2639 	}
   2640 
   2641 	#if PERF_HUD
   2642 		int Renderer::getThreadCount()
   2643 		{
   2644 			return threadCount;
   2645 		}
   2646 
   2647 		int64_t Renderer::getVertexTime(int thread)
   2648 		{
   2649 			return vertexTime[thread];
   2650 		}
   2651 
   2652 		int64_t Renderer::getSetupTime(int thread)
   2653 		{
   2654 			return setupTime[thread];
   2655 		}
   2656 
   2657 		int64_t Renderer::getPixelTime(int thread)
   2658 		{
   2659 			return pixelTime[thread];
   2660 		}
   2661 
   2662 		void Renderer::resetTimers()
   2663 		{
   2664 			for(int thread = 0; thread < threadCount; thread++)
   2665 			{
   2666 				vertexTime[thread] = 0;
   2667 				setupTime[thread] = 0;
   2668 				pixelTime[thread] = 0;
   2669 			}
   2670 		}
   2671 	#endif
   2672 
   2673 	void Renderer::setViewport(const Viewport &viewport)
   2674 	{
   2675 		this->viewport = viewport;
   2676 	}
   2677 
   2678 	void Renderer::setScissor(const Rect &scissor)
   2679 	{
   2680 		this->scissor = scissor;
   2681 	}
   2682 
   2683 	void Renderer::setClipFlags(int flags)
   2684 	{
   2685 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
   2686 	}
   2687 
   2688 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
   2689 	{
   2690 		if(index < MAX_CLIP_PLANES)
   2691 		{
   2692 			userPlane[index] = plane;
   2693 		}
   2694 		else ASSERT(false);
   2695 
   2696 		updateClipPlanes = true;
   2697 	}
   2698 
   2699 	void Renderer::updateConfiguration(bool initialUpdate)
   2700 	{
   2701 		bool newConfiguration = swiftConfig->hasNewConfiguration();
   2702 
   2703 		if(newConfiguration || initialUpdate)
   2704 		{
   2705 			terminateThreads();
   2706 
   2707 			SwiftConfig::Configuration configuration = {};
   2708 			swiftConfig->getConfiguration(configuration);
   2709 
   2710 			precacheVertex = !newConfiguration && configuration.precache;
   2711 			precacheSetup = !newConfiguration && configuration.precache;
   2712 			precachePixel = !newConfiguration && configuration.precache;
   2713 
   2714 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
   2715 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
   2716 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
   2717 
   2718 			switch(configuration.textureSampleQuality)
   2719 			{
   2720 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
   2721 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
   2722 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2723 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
   2724 			}
   2725 
   2726 			switch(configuration.mipmapQuality)
   2727 			{
   2728 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
   2729 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2730 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
   2731 			}
   2732 
   2733 			setPerspectiveCorrection(configuration.perspectiveCorrection);
   2734 
   2735 			switch(configuration.transcendentalPrecision)
   2736 			{
   2737 			case 0:
   2738 				logPrecision = APPROXIMATE;
   2739 				expPrecision = APPROXIMATE;
   2740 				rcpPrecision = APPROXIMATE;
   2741 				rsqPrecision = APPROXIMATE;
   2742 				break;
   2743 			case 1:
   2744 				logPrecision = PARTIAL;
   2745 				expPrecision = PARTIAL;
   2746 				rcpPrecision = PARTIAL;
   2747 				rsqPrecision = PARTIAL;
   2748 				break;
   2749 			case 2:
   2750 				logPrecision = ACCURATE;
   2751 				expPrecision = ACCURATE;
   2752 				rcpPrecision = ACCURATE;
   2753 				rsqPrecision = ACCURATE;
   2754 				break;
   2755 			case 3:
   2756 				logPrecision = WHQL;
   2757 				expPrecision = WHQL;
   2758 				rcpPrecision = WHQL;
   2759 				rsqPrecision = WHQL;
   2760 				break;
   2761 			case 4:
   2762 				logPrecision = IEEE;
   2763 				expPrecision = IEEE;
   2764 				rcpPrecision = IEEE;
   2765 				rsqPrecision = IEEE;
   2766 				break;
   2767 			default:
   2768 				logPrecision = ACCURATE;
   2769 				expPrecision = ACCURATE;
   2770 				rcpPrecision = ACCURATE;
   2771 				rsqPrecision = ACCURATE;
   2772 				break;
   2773 			}
   2774 
   2775 			switch(configuration.transparencyAntialiasing)
   2776 			{
   2777 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2778 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
   2779 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
   2780 			}
   2781 
   2782 			switch(configuration.threadCount)
   2783 			{
   2784 			case -1: threadCount = CPUID::coreCount();        break;
   2785 			case 0:  threadCount = CPUID::processAffinity();  break;
   2786 			default: threadCount = configuration.threadCount; break;
   2787 			}
   2788 
   2789 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
   2790 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
   2791 			CPUID::setEnableSSE3(configuration.enableSSE3);
   2792 			CPUID::setEnableSSE2(configuration.enableSSE2);
   2793 			CPUID::setEnableSSE(configuration.enableSSE);
   2794 
   2795 			for(int pass = 0; pass < 10; pass++)
   2796 			{
   2797 				optimization[pass] = configuration.optimization[pass];
   2798 			}
   2799 
   2800 			forceWindowed = configuration.forceWindowed;
   2801 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
   2802 			postBlendSRGB = configuration.postBlendSRGB;
   2803 			exactColorRounding = configuration.exactColorRounding;
   2804 			forceClearRegisters = configuration.forceClearRegisters;
   2805 
   2806 		#ifndef NDEBUG
   2807 			minPrimitives = configuration.minPrimitives;
   2808 			maxPrimitives = configuration.maxPrimitives;
   2809 		#endif
   2810 		}
   2811 
   2812 		if(!initialUpdate && !worker[0])
   2813 		{
   2814 			initializeThreads();
   2815 		}
   2816 	}
   2817 }
   2818