1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Surface.hpp" 19 #include "Primitive.hpp" 20 #include "Polygon.hpp" 21 #include "Main/FrameBuffer.hpp" 22 #include "Main/SwiftConfig.hpp" 23 #include "Reactor/Reactor.hpp" 24 #include "Shader/Constants.hpp" 25 #include "Common/MutexLock.hpp" 26 #include "Common/CPUID.hpp" 27 #include "Common/Memory.hpp" 28 #include "Common/Resource.hpp" 29 #include "Common/Half.hpp" 30 #include "Common/Math.hpp" 31 #include "Common/Timer.hpp" 32 #include "Common/Debug.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 47 extern bool booleanFaceRegister; 48 extern bool fullPixelPositionRegister; 49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 50 extern bool secondaryColor; // Specular lighting is applied after texturing 51 extern bool colorsDefaultToZero; 52 53 extern bool forceWindowed; 54 extern bool complementaryDepthBuffer; 55 extern bool postBlendSRGB; 56 extern bool exactColorRounding; 57 extern TransparencyAntialiasing transparencyAntialiasing; 58 extern bool forceClearRegisters; 59 60 extern bool precacheVertex; 61 extern bool precacheSetup; 62 extern bool precachePixel; 63 64 static const int batchSize = 128; 65 AtomicInt threadCount(1); 66 AtomicInt Renderer::unitCount(1); 67 AtomicInt Renderer::clusterCount(1); 68 69 TranscendentalPrecision logPrecision = ACCURATE; 70 TranscendentalPrecision expPrecision = ACCURATE; 71 TranscendentalPrecision rcpPrecision = ACCURATE; 72 TranscendentalPrecision rsqPrecision = ACCURATE; 73 bool perspectiveCorrection = true; 74 75 struct Parameters 76 { 77 Renderer *renderer; 78 int threadIndex; 79 }; 80 81 DrawCall::DrawCall() 82 { 83 queries = 0; 84 85 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 86 vsDirtyConstI = 16; 87 vsDirtyConstB = 16; 88 89 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 90 psDirtyConstI = 16; 91 psDirtyConstB = 16; 92 93 references = -1; 94 95 data = (DrawData*)allocate(sizeof(DrawData)); 96 data->constants = &constants; 97 } 98 99 DrawCall::~DrawCall() 100 { 101 delete queries; 102 103 deallocate(data); 104 } 105 106 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 107 { 108 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 109 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; 110 sw::booleanFaceRegister = conventions.booleanFaceRegister; 111 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 112 sw::leadingVertexFirst = conventions.leadingVertexFirst; 113 sw::secondaryColor = conventions.secondaryColor; 114 sw::colorsDefaultToZero = conventions.colorsDefaultToZero; 115 sw::exactColorRounding = exactColorRounding; 116 117 setRenderTarget(0, 0); 118 clipper = new Clipper(symmetricNormalizedDepth); 119 blitter = new Blitter; 120 121 updateViewMatrix = true; 122 updateBaseMatrix = true; 123 updateProjectionMatrix = true; 124 updateClipPlanes = true; 125 126 #if PERF_HUD 127 resetTimers(); 128 #endif 129 130 for(int i = 0; i < 16; i++) 131 { 132 vertexTask[i] = 0; 133 134 worker[i] = 0; 135 resume[i] = 0; 136 suspend[i] = 0; 137 } 138 139 threadsAwake = 0; 140 resumeApp = new Event(); 141 142 currentDraw = 0; 143 nextDraw = 0; 144 145 qHead = 0; 146 qSize = 0; 147 148 for(int i = 0; i < 16; i++) 149 { 150 triangleBatch[i] = 0; 151 primitiveBatch[i] = 0; 152 } 153 154 for(int draw = 0; draw < DRAW_COUNT; draw++) 155 { 156 drawCall[draw] = new DrawCall(); 157 drawList[draw] = drawCall[draw]; 158 } 159 160 for(int unit = 0; unit < 16; unit++) 161 { 162 primitiveProgress[unit].init(); 163 } 164 165 for(int cluster = 0; cluster < 16; cluster++) 166 { 167 pixelProgress[cluster].init(); 168 } 169 170 clipFlags = 0; 171 172 swiftConfig = new SwiftConfig(disableServer); 173 updateConfiguration(true); 174 175 sync = new Resource(0); 176 } 177 178 Renderer::~Renderer() 179 { 180 sync->destruct(); 181 182 delete clipper; 183 clipper = nullptr; 184 185 delete blitter; 186 blitter = nullptr; 187 188 terminateThreads(); 189 delete resumeApp; 190 191 for(int draw = 0; draw < DRAW_COUNT; draw++) 192 { 193 delete drawCall[draw]; 194 } 195 196 delete swiftConfig; 197 } 198 199 // This object has to be mem aligned 200 void* Renderer::operator new(size_t size) 201 { 202 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class 203 return sw::allocate(sizeof(Renderer), 16); 204 } 205 206 void Renderer::operator delete(void * mem) 207 { 208 sw::deallocate(mem); 209 } 210 211 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 212 { 213 #ifndef NDEBUG 214 if(count < minPrimitives || count > maxPrimitives) 215 { 216 return; 217 } 218 #endif 219 220 context->drawType = drawType; 221 222 updateConfiguration(); 223 updateClipper(); 224 225 int ss = context->getSuperSampleCount(); 226 int ms = context->getMultiSampleCount(); 227 228 for(int q = 0; q < ss; q++) 229 { 230 unsigned int oldMultiSampleMask = context->multiSampleMask; 231 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); 232 233 if(!context->multiSampleMask) 234 { 235 continue; 236 } 237 238 sync->lock(sw::PRIVATE); 239 240 if(update || oldMultiSampleMask != context->multiSampleMask) 241 { 242 vertexState = VertexProcessor::update(drawType); 243 setupState = SetupProcessor::update(); 244 pixelState = PixelProcessor::update(); 245 246 vertexRoutine = VertexProcessor::routine(vertexState); 247 setupRoutine = SetupProcessor::routine(setupState); 248 pixelRoutine = PixelProcessor::routine(pixelState); 249 } 250 251 int batch = batchSize / ms; 252 253 int (Renderer::*setupPrimitives)(int batch, int count); 254 255 if(context->isDrawTriangle()) 256 { 257 switch(context->fillMode) 258 { 259 case FILL_SOLID: 260 setupPrimitives = &Renderer::setupSolidTriangles; 261 break; 262 case FILL_WIREFRAME: 263 setupPrimitives = &Renderer::setupWireframeTriangle; 264 batch = 1; 265 break; 266 case FILL_VERTEX: 267 setupPrimitives = &Renderer::setupVertexTriangle; 268 batch = 1; 269 break; 270 default: 271 ASSERT(false); 272 return; 273 } 274 } 275 else if(context->isDrawLine()) 276 { 277 setupPrimitives = &Renderer::setupLines; 278 } 279 else // Point draw 280 { 281 setupPrimitives = &Renderer::setupPoints; 282 } 283 284 DrawCall *draw = nullptr; 285 286 do 287 { 288 for(int i = 0; i < DRAW_COUNT; i++) 289 { 290 if(drawCall[i]->references == -1) 291 { 292 draw = drawCall[i]; 293 drawList[nextDraw & DRAW_COUNT_BITS] = draw; 294 295 break; 296 } 297 } 298 299 if(!draw) 300 { 301 resumeApp->wait(); 302 } 303 } 304 while(!draw); 305 306 DrawData *data = draw->data; 307 308 if(queries.size() != 0) 309 { 310 draw->queries = new std::list<Query*>(); 311 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 312 for(auto &query : queries) 313 { 314 if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 315 { 316 ++query->reference; // Atomic 317 draw->queries->push_back(query); 318 } 319 } 320 } 321 322 draw->drawType = drawType; 323 draw->batchSize = batch; 324 325 vertexRoutine->bind(); 326 setupRoutine->bind(); 327 pixelRoutine->bind(); 328 329 draw->vertexRoutine = vertexRoutine; 330 draw->setupRoutine = setupRoutine; 331 draw->pixelRoutine = pixelRoutine; 332 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 333 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 334 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 335 draw->setupPrimitives = setupPrimitives; 336 draw->setupState = setupState; 337 338 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 339 { 340 draw->vertexStream[i] = context->input[i].resource; 341 data->input[i] = context->input[i].buffer; 342 data->stride[i] = context->input[i].stride; 343 344 if(draw->vertexStream[i]) 345 { 346 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 347 } 348 } 349 350 if(context->indexBuffer) 351 { 352 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 353 } 354 355 draw->indexBuffer = context->indexBuffer; 356 357 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 358 { 359 draw->texture[sampler] = 0; 360 } 361 362 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 363 { 364 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 365 { 366 draw->texture[sampler] = context->texture[sampler]; 367 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 368 369 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 370 } 371 } 372 373 if(context->pixelShader) 374 { 375 if(draw->psDirtyConstF) 376 { 377 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); 378 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 379 draw->psDirtyConstF = 0; 380 } 381 382 if(draw->psDirtyConstI) 383 { 384 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 385 draw->psDirtyConstI = 0; 386 } 387 388 if(draw->psDirtyConstB) 389 { 390 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 391 draw->psDirtyConstB = 0; 392 } 393 394 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 395 } 396 else 397 { 398 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 399 { 400 draw->pUniformBuffers[i] = nullptr; 401 } 402 } 403 404 if(context->pixelShaderModel() <= 0x0104) 405 { 406 for(int stage = 0; stage < 8; stage++) 407 { 408 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) 409 { 410 data->textureStage[stage] = context->textureStage[stage].uniforms; 411 } 412 else break; 413 } 414 } 415 416 if(context->vertexShader) 417 { 418 if(context->vertexShader->getShaderModel() >= 0x0300) 419 { 420 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 421 { 422 if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) 423 { 424 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 425 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 426 427 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 428 } 429 } 430 } 431 432 if(draw->vsDirtyConstF) 433 { 434 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 435 draw->vsDirtyConstF = 0; 436 } 437 438 if(draw->vsDirtyConstI) 439 { 440 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 441 draw->vsDirtyConstI = 0; 442 } 443 444 if(draw->vsDirtyConstB) 445 { 446 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 447 draw->vsDirtyConstB = 0; 448 } 449 450 if(context->vertexShader->isInstanceIdDeclared()) 451 { 452 data->instanceID = context->instanceID; 453 } 454 455 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 456 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 457 } 458 else 459 { 460 data->ff = ff; 461 462 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 463 draw->vsDirtyConstI = 16; 464 draw->vsDirtyConstB = 16; 465 466 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 467 { 468 draw->vUniformBuffers[i] = nullptr; 469 } 470 471 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 472 { 473 draw->transformFeedbackBuffers[i] = nullptr; 474 } 475 } 476 477 if(pixelState.stencilActive) 478 { 479 data->stencil[0] = stencil; 480 data->stencil[1] = stencilCCW; 481 } 482 483 if(pixelState.fogActive) 484 { 485 data->fog = fog; 486 } 487 488 if(setupState.isDrawPoint) 489 { 490 data->point = point; 491 } 492 493 data->lineWidth = context->lineWidth; 494 495 data->factor = factor; 496 497 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 498 { 499 float ref = context->alphaReference * (1.0f / 255.0f); 500 float margin = sw::min(ref, 1.0f - ref); 501 502 if(ms == 4) 503 { 504 data->a2c0 = replicate(ref - margin * 0.6f); 505 data->a2c1 = replicate(ref - margin * 0.2f); 506 data->a2c2 = replicate(ref + margin * 0.2f); 507 data->a2c3 = replicate(ref + margin * 0.6f); 508 } 509 else if(ms == 2) 510 { 511 data->a2c0 = replicate(ref - margin * 0.3f); 512 data->a2c1 = replicate(ref + margin * 0.3f); 513 } 514 else ASSERT(false); 515 } 516 517 if(pixelState.occlusionEnabled) 518 { 519 for(int cluster = 0; cluster < clusterCount; cluster++) 520 { 521 data->occlusion[cluster] = 0; 522 } 523 } 524 525 #if PERF_PROFILE 526 for(int cluster = 0; cluster < clusterCount; cluster++) 527 { 528 for(int i = 0; i < PERF_TIMERS; i++) 529 { 530 data->cycles[i][cluster] = 0; 531 } 532 } 533 #endif 534 535 // Viewport 536 { 537 float W = 0.5f * viewport.width; 538 float H = 0.5f * viewport.height; 539 float X0 = viewport.x0 + W; 540 float Y0 = viewport.y0 + H; 541 float N = viewport.minZ; 542 float F = viewport.maxZ; 543 float Z = F - N; 544 545 if(context->isDrawTriangle(false)) 546 { 547 N += context->depthBias; 548 } 549 550 if(complementaryDepthBuffer) 551 { 552 Z = -Z; 553 N = 1 - N; 554 } 555 556 static const float X[5][16] = // Fragment offsets 557 { 558 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 559 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 560 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 561 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 562 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples 563 }; 564 565 static const float Y[5][16] = // Fragment offsets 566 { 567 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 568 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 569 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 570 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 571 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples 572 }; 573 574 int s = sw::log2(ss); 575 576 data->Wx16 = replicate(W * 16); 577 data->Hx16 = replicate(H * 16); 578 data->X0x16 = replicate(X0 * 16 - 8); 579 data->Y0x16 = replicate(Y0 * 16 - 8); 580 data->XXXX = replicate(X[s][q] / W); 581 data->YYYY = replicate(Y[s][q] / H); 582 data->halfPixelX = replicate(0.5f / W); 583 data->halfPixelY = replicate(0.5f / H); 584 data->viewportHeight = abs(viewport.height); 585 data->slopeDepthBias = context->slopeDepthBias; 586 data->depthRange = Z; 587 data->depthNear = N; 588 draw->clipFlags = clipFlags; 589 590 if(clipFlags) 591 { 592 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 593 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 594 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 595 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 596 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 597 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 598 } 599 } 600 601 // Target 602 { 603 for(int index = 0; index < RENDERTARGETS; index++) 604 { 605 draw->renderTarget[index] = context->renderTarget[index]; 606 607 if(draw->renderTarget[index]) 608 { 609 unsigned int layer = context->renderTargetLayer[index]; 610 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 611 data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); 612 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 613 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 614 } 615 } 616 617 draw->depthBuffer = context->depthBuffer; 618 draw->stencilBuffer = context->stencilBuffer; 619 620 if(draw->depthBuffer) 621 { 622 unsigned int layer = context->depthBufferLayer; 623 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 624 data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); 625 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 626 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 627 } 628 629 if(draw->stencilBuffer) 630 { 631 unsigned int layer = context->stencilBufferLayer; 632 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); 633 data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); 634 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 635 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 636 } 637 } 638 639 // Scissor 640 { 641 data->scissorX0 = scissor.x0; 642 data->scissorX1 = scissor.x1; 643 data->scissorY0 = scissor.y0; 644 data->scissorY1 = scissor.y1; 645 } 646 647 draw->primitive = 0; 648 draw->count = count; 649 650 draw->references = (count + batch - 1) / batch; 651 652 schedulerMutex.lock(); 653 ++nextDraw; // Atomic 654 schedulerMutex.unlock(); 655 656 #ifndef NDEBUG 657 if(threadCount == 1) // Use main thread for draw execution 658 { 659 threadsAwake = 1; 660 task[0].type = Task::RESUME; 661 662 taskLoop(0); 663 } 664 else 665 #endif 666 { 667 if(!threadsAwake) 668 { 669 suspend[0]->wait(); 670 671 threadsAwake = 1; 672 task[0].type = Task::RESUME; 673 674 resume[0]->signal(); 675 } 676 } 677 } 678 } 679 680 void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) 681 { 682 blitter->clear(value, format, dest, clearRect, rgbaMask); 683 } 684 685 void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) 686 { 687 blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); 688 } 689 690 void Renderer::blit3D(Surface *source, Surface *dest) 691 { 692 blitter->blit3D(source, dest); 693 } 694 695 void Renderer::threadFunction(void *parameters) 696 { 697 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 698 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 699 700 if(logPrecision < IEEE) 701 { 702 CPUID::setFlushToZero(true); 703 CPUID::setDenormalsAreZero(true); 704 } 705 706 renderer->threadLoop(threadIndex); 707 } 708 709 void Renderer::threadLoop(int threadIndex) 710 { 711 while(!exitThreads) 712 { 713 taskLoop(threadIndex); 714 715 suspend[threadIndex]->signal(); 716 resume[threadIndex]->wait(); 717 } 718 } 719 720 void Renderer::taskLoop(int threadIndex) 721 { 722 while(task[threadIndex].type != Task::SUSPEND) 723 { 724 scheduleTask(threadIndex); 725 executeTask(threadIndex); 726 } 727 } 728 729 void Renderer::findAvailableTasks() 730 { 731 // Find pixel tasks 732 for(int cluster = 0; cluster < clusterCount; cluster++) 733 { 734 if(!pixelProgress[cluster].executing) 735 { 736 for(int unit = 0; unit < unitCount; unit++) 737 { 738 if(primitiveProgress[unit].references > 0) // Contains processed primitives 739 { 740 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 741 { 742 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 743 { 744 Task &task = taskQueue[qHead]; 745 task.type = Task::PIXELS; 746 task.primitiveUnit = unit; 747 task.pixelCluster = cluster; 748 749 pixelProgress[cluster].executing = true; 750 751 // Commit to the task queue 752 qHead = (qHead + 1) & TASK_COUNT_BITS; 753 qSize++; 754 755 break; 756 } 757 } 758 } 759 } 760 } 761 } 762 763 // Find primitive tasks 764 if(currentDraw == nextDraw) 765 { 766 return; // No more primitives to process 767 } 768 769 for(int unit = 0; unit < unitCount; unit++) 770 { 771 DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; 772 773 int primitive = draw->primitive; 774 int count = draw->count; 775 776 if(primitive >= count) 777 { 778 ++currentDraw; // Atomic 779 780 if(currentDraw == nextDraw) 781 { 782 return; // No more primitives to process 783 } 784 785 draw = drawList[currentDraw & DRAW_COUNT_BITS]; 786 } 787 788 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 789 { 790 primitive = draw->primitive; 791 count = draw->count; 792 int batch = draw->batchSize; 793 794 primitiveProgress[unit].drawCall = currentDraw; 795 primitiveProgress[unit].firstPrimitive = primitive; 796 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 797 798 draw->primitive += batch; 799 800 Task &task = taskQueue[qHead]; 801 task.type = Task::PRIMITIVES; 802 task.primitiveUnit = unit; 803 804 primitiveProgress[unit].references = -1; 805 806 // Commit to the task queue 807 qHead = (qHead + 1) & TASK_COUNT_BITS; 808 qSize++; 809 } 810 } 811 } 812 813 void Renderer::scheduleTask(int threadIndex) 814 { 815 schedulerMutex.lock(); 816 817 int curThreadsAwake = threadsAwake; 818 819 if((int)qSize < threadCount - curThreadsAwake + 1) 820 { 821 findAvailableTasks(); 822 } 823 824 if(qSize != 0) 825 { 826 task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; 827 qSize--; 828 829 if(curThreadsAwake != threadCount) 830 { 831 int wakeup = qSize - curThreadsAwake + 1; 832 833 for(int i = 0; i < threadCount && wakeup > 0; i++) 834 { 835 if(task[i].type == Task::SUSPEND) 836 { 837 suspend[i]->wait(); 838 task[i].type = Task::RESUME; 839 resume[i]->signal(); 840 841 ++threadsAwake; // Atomic 842 wakeup--; 843 } 844 } 845 } 846 } 847 else 848 { 849 task[threadIndex].type = Task::SUSPEND; 850 851 --threadsAwake; // Atomic 852 } 853 854 schedulerMutex.unlock(); 855 } 856 857 void Renderer::executeTask(int threadIndex) 858 { 859 #if PERF_HUD 860 int64_t startTick = Timer::ticks(); 861 #endif 862 863 switch(task[threadIndex].type) 864 { 865 case Task::PRIMITIVES: 866 { 867 int unit = task[threadIndex].primitiveUnit; 868 869 int input = primitiveProgress[unit].firstPrimitive; 870 int count = primitiveProgress[unit].primitiveCount; 871 DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 872 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 873 874 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 875 876 #if PERF_HUD 877 int64_t time = Timer::ticks(); 878 vertexTime[threadIndex] += time - startTick; 879 startTick = time; 880 #endif 881 882 int visible = 0; 883 884 if(!draw->setupState.rasterizerDiscard) 885 { 886 visible = (this->*setupPrimitives)(unit, count); 887 } 888 889 primitiveProgress[unit].visible = visible; 890 primitiveProgress[unit].references = clusterCount; 891 892 #if PERF_HUD 893 setupTime[threadIndex] += Timer::ticks() - startTick; 894 #endif 895 } 896 break; 897 case Task::PIXELS: 898 { 899 int unit = task[threadIndex].primitiveUnit; 900 int visible = primitiveProgress[unit].visible; 901 902 if(visible > 0) 903 { 904 int cluster = task[threadIndex].pixelCluster; 905 Primitive *primitive = primitiveBatch[unit]; 906 DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; 907 DrawData *data = draw->data; 908 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 909 910 pixelRoutine(primitive, visible, cluster, data); 911 } 912 913 finishRendering(task[threadIndex]); 914 915 #if PERF_HUD 916 pixelTime[threadIndex] += Timer::ticks() - startTick; 917 #endif 918 } 919 break; 920 case Task::RESUME: 921 break; 922 case Task::SUSPEND: 923 break; 924 default: 925 ASSERT(false); 926 } 927 } 928 929 void Renderer::synchronize() 930 { 931 sync->lock(sw::PUBLIC); 932 sync->unlock(); 933 } 934 935 void Renderer::finishRendering(Task &pixelTask) 936 { 937 int unit = pixelTask.primitiveUnit; 938 int cluster = pixelTask.pixelCluster; 939 940 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 941 DrawData &data = *draw.data; 942 int primitive = primitiveProgress[unit].firstPrimitive; 943 int count = primitiveProgress[unit].primitiveCount; 944 int processedPrimitives = primitive + count; 945 946 pixelProgress[cluster].processedPrimitives = processedPrimitives; 947 948 if(pixelProgress[cluster].processedPrimitives >= draw.count) 949 { 950 ++pixelProgress[cluster].drawCall; // Atomic 951 pixelProgress[cluster].processedPrimitives = 0; 952 } 953 954 int ref = primitiveProgress[unit].references--; // Atomic 955 956 if(ref == 0) 957 { 958 ref = draw.references--; // Atomic 959 960 if(ref == 0) 961 { 962 #if PERF_PROFILE 963 for(int cluster = 0; cluster < clusterCount; cluster++) 964 { 965 for(int i = 0; i < PERF_TIMERS; i++) 966 { 967 profiler.cycles[i] += data.cycles[i][cluster]; 968 } 969 } 970 #endif 971 972 if(draw.queries) 973 { 974 for(auto &query : *(draw.queries)) 975 { 976 switch(query->type) 977 { 978 case Query::FRAGMENTS_PASSED: 979 for(int cluster = 0; cluster < clusterCount; cluster++) 980 { 981 query->data += data.occlusion[cluster]; 982 } 983 break; 984 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 985 query->data += processedPrimitives; 986 break; 987 default: 988 break; 989 } 990 991 --query->reference; // Atomic 992 } 993 994 delete draw.queries; 995 draw.queries = 0; 996 } 997 998 for(int i = 0; i < RENDERTARGETS; i++) 999 { 1000 if(draw.renderTarget[i]) 1001 { 1002 draw.renderTarget[i]->unlockInternal(); 1003 } 1004 } 1005 1006 if(draw.depthBuffer) 1007 { 1008 draw.depthBuffer->unlockInternal(); 1009 } 1010 1011 if(draw.stencilBuffer) 1012 { 1013 draw.stencilBuffer->unlockStencil(); 1014 } 1015 1016 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 1017 { 1018 if(draw.texture[i]) 1019 { 1020 draw.texture[i]->unlock(); 1021 } 1022 } 1023 1024 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 1025 { 1026 if(draw.vertexStream[i]) 1027 { 1028 draw.vertexStream[i]->unlock(); 1029 } 1030 } 1031 1032 if(draw.indexBuffer) 1033 { 1034 draw.indexBuffer->unlock(); 1035 } 1036 1037 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 1038 { 1039 if(draw.pUniformBuffers[i]) 1040 { 1041 draw.pUniformBuffers[i]->unlock(); 1042 } 1043 if(draw.vUniformBuffers[i]) 1044 { 1045 draw.vUniformBuffers[i]->unlock(); 1046 } 1047 } 1048 1049 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 1050 { 1051 if(draw.transformFeedbackBuffers[i]) 1052 { 1053 draw.transformFeedbackBuffers[i]->unlock(); 1054 } 1055 } 1056 1057 draw.vertexRoutine->unbind(); 1058 draw.setupRoutine->unbind(); 1059 draw.pixelRoutine->unbind(); 1060 1061 sync->unlock(); 1062 1063 draw.references = -1; 1064 resumeApp->signal(); 1065 } 1066 } 1067 1068 pixelProgress[cluster].executing = false; 1069 } 1070 1071 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 1072 { 1073 Triangle *triangle = triangleBatch[unit]; 1074 int primitiveDrawCall = primitiveProgress[unit].drawCall; 1075 DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; 1076 DrawData *data = draw->data; 1077 VertexTask *task = vertexTask[thread]; 1078 1079 const void *indices = data->indices; 1080 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1081 1082 if(task->vertexCache.drawCall != primitiveDrawCall) 1083 { 1084 task->vertexCache.clear(); 1085 task->vertexCache.drawCall = primitiveDrawCall; 1086 } 1087 1088 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1089 1090 switch(draw->drawType) 1091 { 1092 case DRAW_POINTLIST: 1093 { 1094 unsigned int index = start; 1095 1096 for(unsigned int i = 0; i < triangleCount; i++) 1097 { 1098 batch[i][0] = index; 1099 batch[i][1] = index; 1100 batch[i][2] = index; 1101 1102 index += 1; 1103 } 1104 } 1105 break; 1106 case DRAW_LINELIST: 1107 { 1108 unsigned int index = 2 * start; 1109 1110 for(unsigned int i = 0; i < triangleCount; i++) 1111 { 1112 batch[i][0] = index + 0; 1113 batch[i][1] = index + 1; 1114 batch[i][2] = index + 1; 1115 1116 index += 2; 1117 } 1118 } 1119 break; 1120 case DRAW_LINESTRIP: 1121 { 1122 unsigned int index = start; 1123 1124 for(unsigned int i = 0; i < triangleCount; i++) 1125 { 1126 batch[i][0] = index + 0; 1127 batch[i][1] = index + 1; 1128 batch[i][2] = index + 1; 1129 1130 index += 1; 1131 } 1132 } 1133 break; 1134 case DRAW_LINELOOP: 1135 { 1136 unsigned int index = start; 1137 1138 for(unsigned int i = 0; i < triangleCount; i++) 1139 { 1140 batch[i][0] = (index + 0) % loop; 1141 batch[i][1] = (index + 1) % loop; 1142 batch[i][2] = (index + 1) % loop; 1143 1144 index += 1; 1145 } 1146 } 1147 break; 1148 case DRAW_TRIANGLELIST: 1149 { 1150 unsigned int index = 3 * start; 1151 1152 for(unsigned int i = 0; i < triangleCount; i++) 1153 { 1154 batch[i][0] = index + 0; 1155 batch[i][1] = index + 1; 1156 batch[i][2] = index + 2; 1157 1158 index += 3; 1159 } 1160 } 1161 break; 1162 case DRAW_TRIANGLESTRIP: 1163 { 1164 unsigned int index = start; 1165 1166 for(unsigned int i = 0; i < triangleCount; i++) 1167 { 1168 if(leadingVertexFirst) 1169 { 1170 batch[i][0] = index + 0; 1171 batch[i][1] = index + (index & 1) + 1; 1172 batch[i][2] = index + (~index & 1) + 1; 1173 } 1174 else 1175 { 1176 batch[i][0] = index + (index & 1); 1177 batch[i][1] = index + (~index & 1); 1178 batch[i][2] = index + 2; 1179 } 1180 1181 index += 1; 1182 } 1183 } 1184 break; 1185 case DRAW_TRIANGLEFAN: 1186 { 1187 unsigned int index = start; 1188 1189 for(unsigned int i = 0; i < triangleCount; i++) 1190 { 1191 if(leadingVertexFirst) 1192 { 1193 batch[i][0] = index + 1; 1194 batch[i][1] = index + 2; 1195 batch[i][2] = 0; 1196 } 1197 else 1198 { 1199 batch[i][0] = 0; 1200 batch[i][1] = index + 1; 1201 batch[i][2] = index + 2; 1202 } 1203 1204 index += 1; 1205 } 1206 } 1207 break; 1208 case DRAW_INDEXEDPOINTLIST8: 1209 { 1210 const unsigned char *index = (const unsigned char*)indices + start; 1211 1212 for(unsigned int i = 0; i < triangleCount; i++) 1213 { 1214 batch[i][0] = *index; 1215 batch[i][1] = *index; 1216 batch[i][2] = *index; 1217 1218 index += 1; 1219 } 1220 } 1221 break; 1222 case DRAW_INDEXEDPOINTLIST16: 1223 { 1224 const unsigned short *index = (const unsigned short*)indices + start; 1225 1226 for(unsigned int i = 0; i < triangleCount; i++) 1227 { 1228 batch[i][0] = *index; 1229 batch[i][1] = *index; 1230 batch[i][2] = *index; 1231 1232 index += 1; 1233 } 1234 } 1235 break; 1236 case DRAW_INDEXEDPOINTLIST32: 1237 { 1238 const unsigned int *index = (const unsigned int*)indices + start; 1239 1240 for(unsigned int i = 0; i < triangleCount; i++) 1241 { 1242 batch[i][0] = *index; 1243 batch[i][1] = *index; 1244 batch[i][2] = *index; 1245 1246 index += 1; 1247 } 1248 } 1249 break; 1250 case DRAW_INDEXEDLINELIST8: 1251 { 1252 const unsigned char *index = (const unsigned char*)indices + 2 * start; 1253 1254 for(unsigned int i = 0; i < triangleCount; i++) 1255 { 1256 batch[i][0] = index[0]; 1257 batch[i][1] = index[1]; 1258 batch[i][2] = index[1]; 1259 1260 index += 2; 1261 } 1262 } 1263 break; 1264 case DRAW_INDEXEDLINELIST16: 1265 { 1266 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1267 1268 for(unsigned int i = 0; i < triangleCount; i++) 1269 { 1270 batch[i][0] = index[0]; 1271 batch[i][1] = index[1]; 1272 batch[i][2] = index[1]; 1273 1274 index += 2; 1275 } 1276 } 1277 break; 1278 case DRAW_INDEXEDLINELIST32: 1279 { 1280 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1281 1282 for(unsigned int i = 0; i < triangleCount; i++) 1283 { 1284 batch[i][0] = index[0]; 1285 batch[i][1] = index[1]; 1286 batch[i][2] = index[1]; 1287 1288 index += 2; 1289 } 1290 } 1291 break; 1292 case DRAW_INDEXEDLINESTRIP8: 1293 { 1294 const unsigned char *index = (const unsigned char*)indices + start; 1295 1296 for(unsigned int i = 0; i < triangleCount; i++) 1297 { 1298 batch[i][0] = index[0]; 1299 batch[i][1] = index[1]; 1300 batch[i][2] = index[1]; 1301 1302 index += 1; 1303 } 1304 } 1305 break; 1306 case DRAW_INDEXEDLINESTRIP16: 1307 { 1308 const unsigned short *index = (const unsigned short*)indices + start; 1309 1310 for(unsigned int i = 0; i < triangleCount; i++) 1311 { 1312 batch[i][0] = index[0]; 1313 batch[i][1] = index[1]; 1314 batch[i][2] = index[1]; 1315 1316 index += 1; 1317 } 1318 } 1319 break; 1320 case DRAW_INDEXEDLINESTRIP32: 1321 { 1322 const unsigned int *index = (const unsigned int*)indices + start; 1323 1324 for(unsigned int i = 0; i < triangleCount; i++) 1325 { 1326 batch[i][0] = index[0]; 1327 batch[i][1] = index[1]; 1328 batch[i][2] = index[1]; 1329 1330 index += 1; 1331 } 1332 } 1333 break; 1334 case DRAW_INDEXEDLINELOOP8: 1335 { 1336 const unsigned char *index = (const unsigned char*)indices; 1337 1338 for(unsigned int i = 0; i < triangleCount; i++) 1339 { 1340 batch[i][0] = index[(start + i + 0) % loop]; 1341 batch[i][1] = index[(start + i + 1) % loop]; 1342 batch[i][2] = index[(start + i + 1) % loop]; 1343 } 1344 } 1345 break; 1346 case DRAW_INDEXEDLINELOOP16: 1347 { 1348 const unsigned short *index = (const unsigned short*)indices; 1349 1350 for(unsigned int i = 0; i < triangleCount; i++) 1351 { 1352 batch[i][0] = index[(start + i + 0) % loop]; 1353 batch[i][1] = index[(start + i + 1) % loop]; 1354 batch[i][2] = index[(start + i + 1) % loop]; 1355 } 1356 } 1357 break; 1358 case DRAW_INDEXEDLINELOOP32: 1359 { 1360 const unsigned int *index = (const unsigned int*)indices; 1361 1362 for(unsigned int i = 0; i < triangleCount; i++) 1363 { 1364 batch[i][0] = index[(start + i + 0) % loop]; 1365 batch[i][1] = index[(start + i + 1) % loop]; 1366 batch[i][2] = index[(start + i + 1) % loop]; 1367 } 1368 } 1369 break; 1370 case DRAW_INDEXEDTRIANGLELIST8: 1371 { 1372 const unsigned char *index = (const unsigned char*)indices + 3 * start; 1373 1374 for(unsigned int i = 0; i < triangleCount; i++) 1375 { 1376 batch[i][0] = index[0]; 1377 batch[i][1] = index[1]; 1378 batch[i][2] = index[2]; 1379 1380 index += 3; 1381 } 1382 } 1383 break; 1384 case DRAW_INDEXEDTRIANGLELIST16: 1385 { 1386 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1387 1388 for(unsigned int i = 0; i < triangleCount; i++) 1389 { 1390 batch[i][0] = index[0]; 1391 batch[i][1] = index[1]; 1392 batch[i][2] = index[2]; 1393 1394 index += 3; 1395 } 1396 } 1397 break; 1398 case DRAW_INDEXEDTRIANGLELIST32: 1399 { 1400 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1401 1402 for(unsigned int i = 0; i < triangleCount; i++) 1403 { 1404 batch[i][0] = index[0]; 1405 batch[i][1] = index[1]; 1406 batch[i][2] = index[2]; 1407 1408 index += 3; 1409 } 1410 } 1411 break; 1412 case DRAW_INDEXEDTRIANGLESTRIP8: 1413 { 1414 const unsigned char *index = (const unsigned char*)indices + start; 1415 1416 for(unsigned int i = 0; i < triangleCount; i++) 1417 { 1418 batch[i][0] = index[0]; 1419 batch[i][1] = index[((start + i) & 1) + 1]; 1420 batch[i][2] = index[(~(start + i) & 1) + 1]; 1421 1422 index += 1; 1423 } 1424 } 1425 break; 1426 case DRAW_INDEXEDTRIANGLESTRIP16: 1427 { 1428 const unsigned short *index = (const unsigned short*)indices + start; 1429 1430 for(unsigned int i = 0; i < triangleCount; i++) 1431 { 1432 batch[i][0] = index[0]; 1433 batch[i][1] = index[((start + i) & 1) + 1]; 1434 batch[i][2] = index[(~(start + i) & 1) + 1]; 1435 1436 index += 1; 1437 } 1438 } 1439 break; 1440 case DRAW_INDEXEDTRIANGLESTRIP32: 1441 { 1442 const unsigned int *index = (const unsigned int*)indices + start; 1443 1444 for(unsigned int i = 0; i < triangleCount; i++) 1445 { 1446 batch[i][0] = index[0]; 1447 batch[i][1] = index[((start + i) & 1) + 1]; 1448 batch[i][2] = index[(~(start + i) & 1) + 1]; 1449 1450 index += 1; 1451 } 1452 } 1453 break; 1454 case DRAW_INDEXEDTRIANGLEFAN8: 1455 { 1456 const unsigned char *index = (const unsigned char*)indices; 1457 1458 for(unsigned int i = 0; i < triangleCount; i++) 1459 { 1460 batch[i][0] = index[start + i + 1]; 1461 batch[i][1] = index[start + i + 2]; 1462 batch[i][2] = index[0]; 1463 } 1464 } 1465 break; 1466 case DRAW_INDEXEDTRIANGLEFAN16: 1467 { 1468 const unsigned short *index = (const unsigned short*)indices; 1469 1470 for(unsigned int i = 0; i < triangleCount; i++) 1471 { 1472 batch[i][0] = index[start + i + 1]; 1473 batch[i][1] = index[start + i + 2]; 1474 batch[i][2] = index[0]; 1475 } 1476 } 1477 break; 1478 case DRAW_INDEXEDTRIANGLEFAN32: 1479 { 1480 const unsigned int *index = (const unsigned int*)indices; 1481 1482 for(unsigned int i = 0; i < triangleCount; i++) 1483 { 1484 batch[i][0] = index[start + i + 1]; 1485 batch[i][1] = index[start + i + 2]; 1486 batch[i][2] = index[0]; 1487 } 1488 } 1489 break; 1490 case DRAW_QUADLIST: 1491 { 1492 unsigned int index = 4 * start / 2; 1493 1494 for(unsigned int i = 0; i < triangleCount; i += 2) 1495 { 1496 batch[i+0][0] = index + 0; 1497 batch[i+0][1] = index + 1; 1498 batch[i+0][2] = index + 2; 1499 1500 batch[i+1][0] = index + 0; 1501 batch[i+1][1] = index + 2; 1502 batch[i+1][2] = index + 3; 1503 1504 index += 4; 1505 } 1506 } 1507 break; 1508 default: 1509 ASSERT(false); 1510 return; 1511 } 1512 1513 task->primitiveStart = start; 1514 task->vertexCount = triangleCount * 3; 1515 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1516 } 1517 1518 int Renderer::setupSolidTriangles(int unit, int count) 1519 { 1520 Triangle *triangle = triangleBatch[unit]; 1521 Primitive *primitive = primitiveBatch[unit]; 1522 1523 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1524 SetupProcessor::State &state = draw.setupState; 1525 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1526 1527 int ms = state.multiSample; 1528 int pos = state.positionRegister; 1529 const DrawData *data = draw.data; 1530 int visible = 0; 1531 1532 for(int i = 0; i < count; i++, triangle++) 1533 { 1534 Vertex &v0 = triangle->v0; 1535 Vertex &v1 = triangle->v1; 1536 Vertex &v2 = triangle->v2; 1537 1538 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1539 { 1540 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1541 1542 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1543 1544 if(clipFlagsOr != Clipper::CLIP_FINITE) 1545 { 1546 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1547 { 1548 continue; 1549 } 1550 } 1551 1552 if(setupRoutine(primitive, triangle, &polygon, data)) 1553 { 1554 primitive += ms; 1555 visible++; 1556 } 1557 } 1558 } 1559 1560 return visible; 1561 } 1562 1563 int Renderer::setupWireframeTriangle(int unit, int count) 1564 { 1565 Triangle *triangle = triangleBatch[unit]; 1566 Primitive *primitive = primitiveBatch[unit]; 1567 int visible = 0; 1568 1569 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1570 SetupProcessor::State &state = draw.setupState; 1571 1572 const Vertex &v0 = triangle[0].v0; 1573 const Vertex &v1 = triangle[0].v1; 1574 const Vertex &v2 = triangle[0].v2; 1575 1576 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1577 1578 if(state.cullMode == CULL_CLOCKWISE) 1579 { 1580 if(d >= 0) return 0; 1581 } 1582 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1583 { 1584 if(d <= 0) return 0; 1585 } 1586 1587 // Copy attributes 1588 triangle[1].v0 = v1; 1589 triangle[1].v1 = v2; 1590 triangle[2].v0 = v2; 1591 triangle[2].v1 = v0; 1592 1593 if(state.color[0][0].flat) // FIXME 1594 { 1595 for(int i = 0; i < 2; i++) 1596 { 1597 triangle[1].v0.C[i] = triangle[0].v0.C[i]; 1598 triangle[1].v1.C[i] = triangle[0].v0.C[i]; 1599 triangle[2].v0.C[i] = triangle[0].v0.C[i]; 1600 triangle[2].v1.C[i] = triangle[0].v0.C[i]; 1601 } 1602 } 1603 1604 for(int i = 0; i < 3; i++) 1605 { 1606 if(setupLine(*primitive, *triangle, draw)) 1607 { 1608 primitive->area = 0.5f * d; 1609 1610 primitive++; 1611 visible++; 1612 } 1613 1614 triangle++; 1615 } 1616 1617 return visible; 1618 } 1619 1620 int Renderer::setupVertexTriangle(int unit, int count) 1621 { 1622 Triangle *triangle = triangleBatch[unit]; 1623 Primitive *primitive = primitiveBatch[unit]; 1624 int visible = 0; 1625 1626 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1627 SetupProcessor::State &state = draw.setupState; 1628 1629 const Vertex &v0 = triangle[0].v0; 1630 const Vertex &v1 = triangle[0].v1; 1631 const Vertex &v2 = triangle[0].v2; 1632 1633 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1634 1635 if(state.cullMode == CULL_CLOCKWISE) 1636 { 1637 if(d >= 0) return 0; 1638 } 1639 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1640 { 1641 if(d <= 0) return 0; 1642 } 1643 1644 // Copy attributes 1645 triangle[1].v0 = v1; 1646 triangle[2].v0 = v2; 1647 1648 for(int i = 0; i < 3; i++) 1649 { 1650 if(setupPoint(*primitive, *triangle, draw)) 1651 { 1652 primitive->area = 0.5f * d; 1653 1654 primitive++; 1655 visible++; 1656 } 1657 1658 triangle++; 1659 } 1660 1661 return visible; 1662 } 1663 1664 int Renderer::setupLines(int unit, int count) 1665 { 1666 Triangle *triangle = triangleBatch[unit]; 1667 Primitive *primitive = primitiveBatch[unit]; 1668 int visible = 0; 1669 1670 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1671 SetupProcessor::State &state = draw.setupState; 1672 1673 int ms = state.multiSample; 1674 1675 for(int i = 0; i < count; i++) 1676 { 1677 if(setupLine(*primitive, *triangle, draw)) 1678 { 1679 primitive += ms; 1680 visible++; 1681 } 1682 1683 triangle++; 1684 } 1685 1686 return visible; 1687 } 1688 1689 int Renderer::setupPoints(int unit, int count) 1690 { 1691 Triangle *triangle = triangleBatch[unit]; 1692 Primitive *primitive = primitiveBatch[unit]; 1693 int visible = 0; 1694 1695 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1696 SetupProcessor::State &state = draw.setupState; 1697 1698 int ms = state.multiSample; 1699 1700 for(int i = 0; i < count; i++) 1701 { 1702 if(setupPoint(*primitive, *triangle, draw)) 1703 { 1704 primitive += ms; 1705 visible++; 1706 } 1707 1708 triangle++; 1709 } 1710 1711 return visible; 1712 } 1713 1714 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1715 { 1716 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1717 const SetupProcessor::State &state = draw.setupState; 1718 const DrawData &data = *draw.data; 1719 1720 float lineWidth = data.lineWidth; 1721 1722 Vertex &v0 = triangle.v0; 1723 Vertex &v1 = triangle.v1; 1724 1725 int pos = state.positionRegister; 1726 1727 const float4 &P0 = v0.v[pos]; 1728 const float4 &P1 = v1.v[pos]; 1729 1730 if(P0.w <= 0 && P1.w <= 0) 1731 { 1732 return false; 1733 } 1734 1735 const float W = data.Wx16[0] * (1.0f / 16.0f); 1736 const float H = data.Hx16[0] * (1.0f / 16.0f); 1737 1738 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1739 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1740 1741 if(dx == 0 && dy == 0) 1742 { 1743 return false; 1744 } 1745 1746 if(state.multiSample > 1) // Rectangle 1747 { 1748 float4 P[4]; 1749 int C[4]; 1750 1751 P[0] = P0; 1752 P[1] = P1; 1753 P[2] = P1; 1754 P[3] = P0; 1755 1756 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1757 1758 dx *= scale; 1759 dy *= scale; 1760 1761 float dx0h = dx * P0.w / H; 1762 float dy0w = dy * P0.w / W; 1763 1764 float dx1h = dx * P1.w / H; 1765 float dy1w = dy * P1.w / W; 1766 1767 P[0].x += -dy0w; 1768 P[0].y += +dx0h; 1769 C[0] = clipper->computeClipFlags(P[0]); 1770 1771 P[1].x += -dy1w; 1772 P[1].y += +dx1h; 1773 C[1] = clipper->computeClipFlags(P[1]); 1774 1775 P[2].x += +dy1w; 1776 P[2].y += -dx1h; 1777 C[2] = clipper->computeClipFlags(P[2]); 1778 1779 P[3].x += +dy0w; 1780 P[3].y += -dx0h; 1781 C[3] = clipper->computeClipFlags(P[3]); 1782 1783 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1784 { 1785 Polygon polygon(P, 4); 1786 1787 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1788 1789 if(clipFlagsOr != Clipper::CLIP_FINITE) 1790 { 1791 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1792 { 1793 return false; 1794 } 1795 } 1796 1797 return setupRoutine(&primitive, &triangle, &polygon, &data); 1798 } 1799 } 1800 else // Diamond test convention 1801 { 1802 float4 P[8]; 1803 int C[8]; 1804 1805 P[0] = P0; 1806 P[1] = P0; 1807 P[2] = P0; 1808 P[3] = P0; 1809 P[4] = P1; 1810 P[5] = P1; 1811 P[6] = P1; 1812 P[7] = P1; 1813 1814 float dx0 = lineWidth * 0.5f * P0.w / W; 1815 float dy0 = lineWidth * 0.5f * P0.w / H; 1816 1817 float dx1 = lineWidth * 0.5f * P1.w / W; 1818 float dy1 = lineWidth * 0.5f * P1.w / H; 1819 1820 P[0].x += -dx0; 1821 C[0] = clipper->computeClipFlags(P[0]); 1822 1823 P[1].y += +dy0; 1824 C[1] = clipper->computeClipFlags(P[1]); 1825 1826 P[2].x += +dx0; 1827 C[2] = clipper->computeClipFlags(P[2]); 1828 1829 P[3].y += -dy0; 1830 C[3] = clipper->computeClipFlags(P[3]); 1831 1832 P[4].x += -dx1; 1833 C[4] = clipper->computeClipFlags(P[4]); 1834 1835 P[5].y += +dy1; 1836 C[5] = clipper->computeClipFlags(P[5]); 1837 1838 P[6].x += +dx1; 1839 C[6] = clipper->computeClipFlags(P[6]); 1840 1841 P[7].y += -dy1; 1842 C[7] = clipper->computeClipFlags(P[7]); 1843 1844 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1845 { 1846 float4 L[6]; 1847 1848 if(dx > -dy) 1849 { 1850 if(dx > dy) // Right 1851 { 1852 L[0] = P[0]; 1853 L[1] = P[1]; 1854 L[2] = P[5]; 1855 L[3] = P[6]; 1856 L[4] = P[7]; 1857 L[5] = P[3]; 1858 } 1859 else // Down 1860 { 1861 L[0] = P[0]; 1862 L[1] = P[4]; 1863 L[2] = P[5]; 1864 L[3] = P[6]; 1865 L[4] = P[2]; 1866 L[5] = P[3]; 1867 } 1868 } 1869 else 1870 { 1871 if(dx > dy) // Up 1872 { 1873 L[0] = P[0]; 1874 L[1] = P[1]; 1875 L[2] = P[2]; 1876 L[3] = P[6]; 1877 L[4] = P[7]; 1878 L[5] = P[4]; 1879 } 1880 else // Left 1881 { 1882 L[0] = P[1]; 1883 L[1] = P[2]; 1884 L[2] = P[3]; 1885 L[3] = P[7]; 1886 L[4] = P[4]; 1887 L[5] = P[5]; 1888 } 1889 } 1890 1891 Polygon polygon(L, 6); 1892 1893 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1894 1895 if(clipFlagsOr != Clipper::CLIP_FINITE) 1896 { 1897 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1898 { 1899 return false; 1900 } 1901 } 1902 1903 return setupRoutine(&primitive, &triangle, &polygon, &data); 1904 } 1905 } 1906 1907 return false; 1908 } 1909 1910 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1911 { 1912 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1913 const SetupProcessor::State &state = draw.setupState; 1914 const DrawData &data = *draw.data; 1915 1916 Vertex &v = triangle.v0; 1917 1918 float pSize; 1919 1920 int pts = state.pointSizeRegister; 1921 1922 if(state.pointSizeRegister != Unused) 1923 { 1924 pSize = v.v[pts].y; 1925 } 1926 else 1927 { 1928 pSize = data.point.pointSize[0]; 1929 } 1930 1931 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); 1932 1933 float4 P[4]; 1934 int C[4]; 1935 1936 int pos = state.positionRegister; 1937 1938 P[0] = v.v[pos]; 1939 P[1] = v.v[pos]; 1940 P[2] = v.v[pos]; 1941 P[3] = v.v[pos]; 1942 1943 const float X = pSize * P[0].w * data.halfPixelX[0]; 1944 const float Y = pSize * P[0].w * data.halfPixelY[0]; 1945 1946 P[0].x -= X; 1947 P[0].y += Y; 1948 C[0] = clipper->computeClipFlags(P[0]); 1949 1950 P[1].x += X; 1951 P[1].y += Y; 1952 C[1] = clipper->computeClipFlags(P[1]); 1953 1954 P[2].x += X; 1955 P[2].y -= Y; 1956 C[2] = clipper->computeClipFlags(P[2]); 1957 1958 P[3].x -= X; 1959 P[3].y -= Y; 1960 C[3] = clipper->computeClipFlags(P[3]); 1961 1962 triangle.v1 = triangle.v0; 1963 triangle.v2 = triangle.v0; 1964 1965 triangle.v1.X += iround(16 * 0.5f * pSize); 1966 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 1967 1968 Polygon polygon(P, 4); 1969 1970 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1971 { 1972 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1973 1974 if(clipFlagsOr != Clipper::CLIP_FINITE) 1975 { 1976 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1977 { 1978 return false; 1979 } 1980 } 1981 1982 return setupRoutine(&primitive, &triangle, &polygon, &data); 1983 } 1984 1985 return false; 1986 } 1987 1988 void Renderer::initializeThreads() 1989 { 1990 unitCount = ceilPow2(threadCount); 1991 clusterCount = ceilPow2(threadCount); 1992 1993 for(int i = 0; i < unitCount; i++) 1994 { 1995 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 1996 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 1997 } 1998 1999 for(int i = 0; i < threadCount; i++) 2000 { 2001 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 2002 vertexTask[i]->vertexCache.drawCall = -1; 2003 2004 task[i].type = Task::SUSPEND; 2005 2006 resume[i] = new Event(); 2007 suspend[i] = new Event(); 2008 2009 Parameters parameters; 2010 parameters.threadIndex = i; 2011 parameters.renderer = this; 2012 2013 exitThreads = false; 2014 worker[i] = new Thread(threadFunction, ¶meters); 2015 2016 suspend[i]->wait(); 2017 suspend[i]->signal(); 2018 } 2019 } 2020 2021 void Renderer::terminateThreads() 2022 { 2023 while(threadsAwake != 0) 2024 { 2025 Thread::sleep(1); 2026 } 2027 2028 for(int thread = 0; thread < threadCount; thread++) 2029 { 2030 if(worker[thread]) 2031 { 2032 exitThreads = true; 2033 resume[thread]->signal(); 2034 worker[thread]->join(); 2035 2036 delete worker[thread]; 2037 worker[thread] = 0; 2038 delete resume[thread]; 2039 resume[thread] = 0; 2040 delete suspend[thread]; 2041 suspend[thread] = 0; 2042 } 2043 2044 deallocate(vertexTask[thread]); 2045 vertexTask[thread] = 0; 2046 } 2047 2048 for(int i = 0; i < 16; i++) 2049 { 2050 deallocate(triangleBatch[i]); 2051 triangleBatch[i] = 0; 2052 2053 deallocate(primitiveBatch[i]); 2054 primitiveBatch[i] = 0; 2055 } 2056 } 2057 2058 void Renderer::loadConstants(const VertexShader *vertexShader) 2059 { 2060 if(!vertexShader) return; 2061 2062 size_t count = vertexShader->getLength(); 2063 2064 for(size_t i = 0; i < count; i++) 2065 { 2066 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 2067 2068 if(instruction->opcode == Shader::OPCODE_DEF) 2069 { 2070 int index = instruction->dst.index; 2071 float value[4]; 2072 2073 value[0] = instruction->src[0].value[0]; 2074 value[1] = instruction->src[0].value[1]; 2075 value[2] = instruction->src[0].value[2]; 2076 value[3] = instruction->src[0].value[3]; 2077 2078 setVertexShaderConstantF(index, value); 2079 } 2080 else if(instruction->opcode == Shader::OPCODE_DEFI) 2081 { 2082 int index = instruction->dst.index; 2083 int integer[4]; 2084 2085 integer[0] = instruction->src[0].integer[0]; 2086 integer[1] = instruction->src[0].integer[1]; 2087 integer[2] = instruction->src[0].integer[2]; 2088 integer[3] = instruction->src[0].integer[3]; 2089 2090 setVertexShaderConstantI(index, integer); 2091 } 2092 else if(instruction->opcode == Shader::OPCODE_DEFB) 2093 { 2094 int index = instruction->dst.index; 2095 int boolean = instruction->src[0].boolean[0]; 2096 2097 setVertexShaderConstantB(index, &boolean); 2098 } 2099 } 2100 } 2101 2102 void Renderer::loadConstants(const PixelShader *pixelShader) 2103 { 2104 if(!pixelShader) return; 2105 2106 size_t count = pixelShader->getLength(); 2107 2108 for(size_t i = 0; i < count; i++) 2109 { 2110 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 2111 2112 if(instruction->opcode == Shader::OPCODE_DEF) 2113 { 2114 int index = instruction->dst.index; 2115 float value[4]; 2116 2117 value[0] = instruction->src[0].value[0]; 2118 value[1] = instruction->src[0].value[1]; 2119 value[2] = instruction->src[0].value[2]; 2120 value[3] = instruction->src[0].value[3]; 2121 2122 setPixelShaderConstantF(index, value); 2123 } 2124 else if(instruction->opcode == Shader::OPCODE_DEFI) 2125 { 2126 int index = instruction->dst.index; 2127 int integer[4]; 2128 2129 integer[0] = instruction->src[0].integer[0]; 2130 integer[1] = instruction->src[0].integer[1]; 2131 integer[2] = instruction->src[0].integer[2]; 2132 integer[3] = instruction->src[0].integer[3]; 2133 2134 setPixelShaderConstantI(index, integer); 2135 } 2136 else if(instruction->opcode == Shader::OPCODE_DEFB) 2137 { 2138 int index = instruction->dst.index; 2139 int boolean = instruction->src[0].boolean[0]; 2140 2141 setPixelShaderConstantB(index, &boolean); 2142 } 2143 } 2144 } 2145 2146 void Renderer::setIndexBuffer(Resource *indexBuffer) 2147 { 2148 context->indexBuffer = indexBuffer; 2149 } 2150 2151 void Renderer::setMultiSampleMask(unsigned int mask) 2152 { 2153 context->sampleMask = mask; 2154 } 2155 2156 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 2157 { 2158 sw::transparencyAntialiasing = transparencyAntialiasing; 2159 } 2160 2161 bool Renderer::isReadWriteTexture(int sampler) 2162 { 2163 for(int index = 0; index < RENDERTARGETS; index++) 2164 { 2165 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 2166 { 2167 return true; 2168 } 2169 } 2170 2171 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 2172 { 2173 return true; 2174 } 2175 2176 return false; 2177 } 2178 2179 void Renderer::updateClipper() 2180 { 2181 if(updateClipPlanes) 2182 { 2183 if(VertexProcessor::isFixedFunction()) // User plane in world space 2184 { 2185 const Matrix &scissorWorld = getViewTransform(); 2186 2187 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; 2188 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; 2189 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; 2190 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; 2191 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; 2192 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; 2193 } 2194 else // User plane in clip space 2195 { 2196 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 2197 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 2198 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 2199 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 2200 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 2201 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 2202 } 2203 2204 updateClipPlanes = false; 2205 } 2206 } 2207 2208 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 2209 { 2210 ASSERT(sampler < TOTAL_IMAGE_UNITS); 2211 2212 context->texture[sampler] = resource; 2213 } 2214 2215 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 2216 { 2217 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 2218 2219 context->sampler[sampler].setTextureLevel(face, level, surface, type); 2220 } 2221 2222 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 2223 { 2224 if(type == SAMPLER_PIXEL) 2225 { 2226 PixelProcessor::setTextureFilter(sampler, textureFilter); 2227 } 2228 else 2229 { 2230 VertexProcessor::setTextureFilter(sampler, textureFilter); 2231 } 2232 } 2233 2234 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 2235 { 2236 if(type == SAMPLER_PIXEL) 2237 { 2238 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 2239 } 2240 else 2241 { 2242 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 2243 } 2244 } 2245 2246 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 2247 { 2248 if(type == SAMPLER_PIXEL) 2249 { 2250 PixelProcessor::setGatherEnable(sampler, enable); 2251 } 2252 else 2253 { 2254 VertexProcessor::setGatherEnable(sampler, enable); 2255 } 2256 } 2257 2258 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 2259 { 2260 if(type == SAMPLER_PIXEL) 2261 { 2262 PixelProcessor::setAddressingModeU(sampler, addressMode); 2263 } 2264 else 2265 { 2266 VertexProcessor::setAddressingModeU(sampler, addressMode); 2267 } 2268 } 2269 2270 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 2271 { 2272 if(type == SAMPLER_PIXEL) 2273 { 2274 PixelProcessor::setAddressingModeV(sampler, addressMode); 2275 } 2276 else 2277 { 2278 VertexProcessor::setAddressingModeV(sampler, addressMode); 2279 } 2280 } 2281 2282 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 2283 { 2284 if(type == SAMPLER_PIXEL) 2285 { 2286 PixelProcessor::setAddressingModeW(sampler, addressMode); 2287 } 2288 else 2289 { 2290 VertexProcessor::setAddressingModeW(sampler, addressMode); 2291 } 2292 } 2293 2294 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 2295 { 2296 if(type == SAMPLER_PIXEL) 2297 { 2298 PixelProcessor::setReadSRGB(sampler, sRGB); 2299 } 2300 else 2301 { 2302 VertexProcessor::setReadSRGB(sampler, sRGB); 2303 } 2304 } 2305 2306 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 2307 { 2308 if(type == SAMPLER_PIXEL) 2309 { 2310 PixelProcessor::setMipmapLOD(sampler, bias); 2311 } 2312 else 2313 { 2314 VertexProcessor::setMipmapLOD(sampler, bias); 2315 } 2316 } 2317 2318 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 2319 { 2320 if(type == SAMPLER_PIXEL) 2321 { 2322 PixelProcessor::setBorderColor(sampler, borderColor); 2323 } 2324 else 2325 { 2326 VertexProcessor::setBorderColor(sampler, borderColor); 2327 } 2328 } 2329 2330 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 2331 { 2332 if(type == SAMPLER_PIXEL) 2333 { 2334 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2335 } 2336 else 2337 { 2338 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2339 } 2340 } 2341 2342 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) 2343 { 2344 if(type == SAMPLER_PIXEL) 2345 { 2346 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2347 } 2348 else 2349 { 2350 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2351 } 2352 } 2353 2354 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2355 { 2356 if(type == SAMPLER_PIXEL) 2357 { 2358 PixelProcessor::setSwizzleR(sampler, swizzleR); 2359 } 2360 else 2361 { 2362 VertexProcessor::setSwizzleR(sampler, swizzleR); 2363 } 2364 } 2365 2366 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2367 { 2368 if(type == SAMPLER_PIXEL) 2369 { 2370 PixelProcessor::setSwizzleG(sampler, swizzleG); 2371 } 2372 else 2373 { 2374 VertexProcessor::setSwizzleG(sampler, swizzleG); 2375 } 2376 } 2377 2378 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2379 { 2380 if(type == SAMPLER_PIXEL) 2381 { 2382 PixelProcessor::setSwizzleB(sampler, swizzleB); 2383 } 2384 else 2385 { 2386 VertexProcessor::setSwizzleB(sampler, swizzleB); 2387 } 2388 } 2389 2390 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2391 { 2392 if(type == SAMPLER_PIXEL) 2393 { 2394 PixelProcessor::setSwizzleA(sampler, swizzleA); 2395 } 2396 else 2397 { 2398 VertexProcessor::setSwizzleA(sampler, swizzleA); 2399 } 2400 } 2401 2402 void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) 2403 { 2404 if(type == SAMPLER_PIXEL) 2405 { 2406 PixelProcessor::setCompareFunc(sampler, compFunc); 2407 } 2408 else 2409 { 2410 VertexProcessor::setCompareFunc(sampler, compFunc); 2411 } 2412 } 2413 2414 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) 2415 { 2416 if(type == SAMPLER_PIXEL) 2417 { 2418 PixelProcessor::setBaseLevel(sampler, baseLevel); 2419 } 2420 else 2421 { 2422 VertexProcessor::setBaseLevel(sampler, baseLevel); 2423 } 2424 } 2425 2426 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) 2427 { 2428 if(type == SAMPLER_PIXEL) 2429 { 2430 PixelProcessor::setMaxLevel(sampler, maxLevel); 2431 } 2432 else 2433 { 2434 VertexProcessor::setMaxLevel(sampler, maxLevel); 2435 } 2436 } 2437 2438 void Renderer::setMinLod(SamplerType type, int sampler, float minLod) 2439 { 2440 if(type == SAMPLER_PIXEL) 2441 { 2442 PixelProcessor::setMinLod(sampler, minLod); 2443 } 2444 else 2445 { 2446 VertexProcessor::setMinLod(sampler, minLod); 2447 } 2448 } 2449 2450 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) 2451 { 2452 if(type == SAMPLER_PIXEL) 2453 { 2454 PixelProcessor::setMaxLod(sampler, maxLod); 2455 } 2456 else 2457 { 2458 VertexProcessor::setMaxLod(sampler, maxLod); 2459 } 2460 } 2461 2462 void Renderer::setPointSpriteEnable(bool pointSpriteEnable) 2463 { 2464 context->setPointSpriteEnable(pointSpriteEnable); 2465 } 2466 2467 void Renderer::setPointScaleEnable(bool pointScaleEnable) 2468 { 2469 context->setPointScaleEnable(pointScaleEnable); 2470 } 2471 2472 void Renderer::setLineWidth(float width) 2473 { 2474 context->lineWidth = width; 2475 } 2476 2477 void Renderer::setDepthBias(float bias) 2478 { 2479 context->depthBias = bias; 2480 } 2481 2482 void Renderer::setSlopeDepthBias(float slopeBias) 2483 { 2484 context->slopeDepthBias = slopeBias; 2485 } 2486 2487 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2488 { 2489 context->rasterizerDiscard = rasterizerDiscard; 2490 } 2491 2492 void Renderer::setPixelShader(const PixelShader *shader) 2493 { 2494 context->pixelShader = shader; 2495 2496 loadConstants(shader); 2497 } 2498 2499 void Renderer::setVertexShader(const VertexShader *shader) 2500 { 2501 context->vertexShader = shader; 2502 2503 loadConstants(shader); 2504 } 2505 2506 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2507 { 2508 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2509 { 2510 if(drawCall[i]->psDirtyConstF < index + count) 2511 { 2512 drawCall[i]->psDirtyConstF = index + count; 2513 } 2514 } 2515 2516 for(unsigned int i = 0; i < count; i++) 2517 { 2518 PixelProcessor::setFloatConstant(index + i, value); 2519 value += 4; 2520 } 2521 } 2522 2523 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2524 { 2525 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2526 { 2527 if(drawCall[i]->psDirtyConstI < index + count) 2528 { 2529 drawCall[i]->psDirtyConstI = index + count; 2530 } 2531 } 2532 2533 for(unsigned int i = 0; i < count; i++) 2534 { 2535 PixelProcessor::setIntegerConstant(index + i, value); 2536 value += 4; 2537 } 2538 } 2539 2540 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2541 { 2542 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2543 { 2544 if(drawCall[i]->psDirtyConstB < index + count) 2545 { 2546 drawCall[i]->psDirtyConstB = index + count; 2547 } 2548 } 2549 2550 for(unsigned int i = 0; i < count; i++) 2551 { 2552 PixelProcessor::setBooleanConstant(index + i, *boolean); 2553 boolean++; 2554 } 2555 } 2556 2557 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2558 { 2559 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2560 { 2561 if(drawCall[i]->vsDirtyConstF < index + count) 2562 { 2563 drawCall[i]->vsDirtyConstF = index + count; 2564 } 2565 } 2566 2567 for(unsigned int i = 0; i < count; i++) 2568 { 2569 VertexProcessor::setFloatConstant(index + i, value); 2570 value += 4; 2571 } 2572 } 2573 2574 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2575 { 2576 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2577 { 2578 if(drawCall[i]->vsDirtyConstI < index + count) 2579 { 2580 drawCall[i]->vsDirtyConstI = index + count; 2581 } 2582 } 2583 2584 for(unsigned int i = 0; i < count; i++) 2585 { 2586 VertexProcessor::setIntegerConstant(index + i, value); 2587 value += 4; 2588 } 2589 } 2590 2591 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2592 { 2593 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2594 { 2595 if(drawCall[i]->vsDirtyConstB < index + count) 2596 { 2597 drawCall[i]->vsDirtyConstB = index + count; 2598 } 2599 } 2600 2601 for(unsigned int i = 0; i < count; i++) 2602 { 2603 VertexProcessor::setBooleanConstant(index + i, *boolean); 2604 boolean++; 2605 } 2606 } 2607 2608 void Renderer::setModelMatrix(const Matrix &M, int i) 2609 { 2610 VertexProcessor::setModelMatrix(M, i); 2611 } 2612 2613 void Renderer::setViewMatrix(const Matrix &V) 2614 { 2615 VertexProcessor::setViewMatrix(V); 2616 updateClipPlanes = true; 2617 } 2618 2619 void Renderer::setBaseMatrix(const Matrix &B) 2620 { 2621 VertexProcessor::setBaseMatrix(B); 2622 updateClipPlanes = true; 2623 } 2624 2625 void Renderer::setProjectionMatrix(const Matrix &P) 2626 { 2627 VertexProcessor::setProjectionMatrix(P); 2628 updateClipPlanes = true; 2629 } 2630 2631 void Renderer::addQuery(Query *query) 2632 { 2633 queries.push_back(query); 2634 } 2635 2636 void Renderer::removeQuery(Query *query) 2637 { 2638 queries.remove(query); 2639 } 2640 2641 #if PERF_HUD 2642 int Renderer::getThreadCount() 2643 { 2644 return threadCount; 2645 } 2646 2647 int64_t Renderer::getVertexTime(int thread) 2648 { 2649 return vertexTime[thread]; 2650 } 2651 2652 int64_t Renderer::getSetupTime(int thread) 2653 { 2654 return setupTime[thread]; 2655 } 2656 2657 int64_t Renderer::getPixelTime(int thread) 2658 { 2659 return pixelTime[thread]; 2660 } 2661 2662 void Renderer::resetTimers() 2663 { 2664 for(int thread = 0; thread < threadCount; thread++) 2665 { 2666 vertexTime[thread] = 0; 2667 setupTime[thread] = 0; 2668 pixelTime[thread] = 0; 2669 } 2670 } 2671 #endif 2672 2673 void Renderer::setViewport(const Viewport &viewport) 2674 { 2675 this->viewport = viewport; 2676 } 2677 2678 void Renderer::setScissor(const Rect &scissor) 2679 { 2680 this->scissor = scissor; 2681 } 2682 2683 void Renderer::setClipFlags(int flags) 2684 { 2685 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2686 } 2687 2688 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2689 { 2690 if(index < MAX_CLIP_PLANES) 2691 { 2692 userPlane[index] = plane; 2693 } 2694 else ASSERT(false); 2695 2696 updateClipPlanes = true; 2697 } 2698 2699 void Renderer::updateConfiguration(bool initialUpdate) 2700 { 2701 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2702 2703 if(newConfiguration || initialUpdate) 2704 { 2705 terminateThreads(); 2706 2707 SwiftConfig::Configuration configuration = {}; 2708 swiftConfig->getConfiguration(configuration); 2709 2710 precacheVertex = !newConfiguration && configuration.precache; 2711 precacheSetup = !newConfiguration && configuration.precache; 2712 precachePixel = !newConfiguration && configuration.precache; 2713 2714 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2715 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2716 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2717 2718 switch(configuration.textureSampleQuality) 2719 { 2720 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2721 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2722 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2723 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2724 } 2725 2726 switch(configuration.mipmapQuality) 2727 { 2728 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2729 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2730 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2731 } 2732 2733 setPerspectiveCorrection(configuration.perspectiveCorrection); 2734 2735 switch(configuration.transcendentalPrecision) 2736 { 2737 case 0: 2738 logPrecision = APPROXIMATE; 2739 expPrecision = APPROXIMATE; 2740 rcpPrecision = APPROXIMATE; 2741 rsqPrecision = APPROXIMATE; 2742 break; 2743 case 1: 2744 logPrecision = PARTIAL; 2745 expPrecision = PARTIAL; 2746 rcpPrecision = PARTIAL; 2747 rsqPrecision = PARTIAL; 2748 break; 2749 case 2: 2750 logPrecision = ACCURATE; 2751 expPrecision = ACCURATE; 2752 rcpPrecision = ACCURATE; 2753 rsqPrecision = ACCURATE; 2754 break; 2755 case 3: 2756 logPrecision = WHQL; 2757 expPrecision = WHQL; 2758 rcpPrecision = WHQL; 2759 rsqPrecision = WHQL; 2760 break; 2761 case 4: 2762 logPrecision = IEEE; 2763 expPrecision = IEEE; 2764 rcpPrecision = IEEE; 2765 rsqPrecision = IEEE; 2766 break; 2767 default: 2768 logPrecision = ACCURATE; 2769 expPrecision = ACCURATE; 2770 rcpPrecision = ACCURATE; 2771 rsqPrecision = ACCURATE; 2772 break; 2773 } 2774 2775 switch(configuration.transparencyAntialiasing) 2776 { 2777 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2778 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2779 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2780 } 2781 2782 switch(configuration.threadCount) 2783 { 2784 case -1: threadCount = CPUID::coreCount(); break; 2785 case 0: threadCount = CPUID::processAffinity(); break; 2786 default: threadCount = configuration.threadCount; break; 2787 } 2788 2789 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2790 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2791 CPUID::setEnableSSE3(configuration.enableSSE3); 2792 CPUID::setEnableSSE2(configuration.enableSSE2); 2793 CPUID::setEnableSSE(configuration.enableSSE); 2794 2795 for(int pass = 0; pass < 10; pass++) 2796 { 2797 optimization[pass] = configuration.optimization[pass]; 2798 } 2799 2800 forceWindowed = configuration.forceWindowed; 2801 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2802 postBlendSRGB = configuration.postBlendSRGB; 2803 exactColorRounding = configuration.exactColorRounding; 2804 forceClearRegisters = configuration.forceClearRegisters; 2805 2806 #ifndef NDEBUG 2807 minPrimitives = configuration.minPrimitives; 2808 maxPrimitives = configuration.maxPrimitives; 2809 #endif 2810 } 2811 2812 if(!initialUpdate && !worker[0]) 2813 { 2814 initializeThreads(); 2815 } 2816 } 2817 } 2818