1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Surface.hpp" 19 #include "Primitive.hpp" 20 #include "Polygon.hpp" 21 #include "Main/FrameBuffer.hpp" 22 #include "Main/SwiftConfig.hpp" 23 #include "Reactor/Reactor.hpp" 24 #include "Shader/Constants.hpp" 25 #include "Common/MutexLock.hpp" 26 #include "Common/CPUID.hpp" 27 #include "Common/Memory.hpp" 28 #include "Common/Resource.hpp" 29 #include "Common/Half.hpp" 30 #include "Common/Math.hpp" 31 #include "Common/Timer.hpp" 32 #include "Common/Debug.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 47 extern bool booleanFaceRegister; 48 extern bool fullPixelPositionRegister; 49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 50 extern bool secondaryColor; // Specular lighting is applied after texturing 51 extern bool colorsDefaultToZero; 52 53 extern bool forceWindowed; 54 extern bool complementaryDepthBuffer; 55 extern bool postBlendSRGB; 56 extern bool exactColorRounding; 57 extern TransparencyAntialiasing transparencyAntialiasing; 58 extern bool forceClearRegisters; 59 60 extern bool precacheVertex; 61 extern bool precacheSetup; 62 extern bool precachePixel; 63 64 static const int batchSize = 128; 65 AtomicInt threadCount(1); 66 AtomicInt Renderer::unitCount(1); 67 AtomicInt Renderer::clusterCount(1); 68 69 TranscendentalPrecision logPrecision = ACCURATE; 70 TranscendentalPrecision expPrecision = ACCURATE; 71 TranscendentalPrecision rcpPrecision = ACCURATE; 72 TranscendentalPrecision rsqPrecision = ACCURATE; 73 bool perspectiveCorrection = true; 74 75 static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) 76 { 77 static bool initialized = false; 78 79 if(!initialized) 80 { 81 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 82 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; 83 sw::booleanFaceRegister = conventions.booleanFaceRegister; 84 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 85 sw::leadingVertexFirst = conventions.leadingVertexFirst; 86 sw::secondaryColor = conventions.secondaryColor; 87 sw::colorsDefaultToZero = conventions.colorsDefaultToZero; 88 sw::exactColorRounding = exactColorRounding; 89 initialized = true; 90 } 91 } 92 93 struct Parameters 94 { 95 Renderer *renderer; 96 int threadIndex; 97 }; 98 99 Query::Query(Type type) : building(false), data(0), type(type), reference(1) 100 { 101 } 102 103 void Query::addRef() 104 { 105 ++reference; // Atomic 106 } 107 108 void Query::release() 109 { 110 int ref = reference--; // Atomic 111 112 ASSERT(ref >= 0); 113 114 if(ref == 0) 115 { 116 delete this; 117 } 118 } 119 120 DrawCall::DrawCall() 121 { 122 queries = 0; 123 124 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 125 vsDirtyConstI = 16; 126 vsDirtyConstB = 16; 127 128 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 129 psDirtyConstI = 16; 130 psDirtyConstB = 16; 131 132 references = -1; 133 134 data = (DrawData*)allocate(sizeof(DrawData)); 135 data->constants = &constants; 136 } 137 138 DrawCall::~DrawCall() 139 { 140 delete queries; 141 142 deallocate(data); 143 } 144 145 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 146 { 147 setGlobalRenderingSettings(conventions, exactColorRounding); 148 149 setRenderTarget(0, 0); 150 clipper = new Clipper(symmetricNormalizedDepth); 151 blitter = new Blitter; 152 153 updateViewMatrix = true; 154 updateBaseMatrix = true; 155 updateProjectionMatrix = true; 156 updateClipPlanes = true; 157 158 #if PERF_HUD 159 resetTimers(); 160 #endif 161 162 for(int i = 0; i < 16; i++) 163 { 164 vertexTask[i] = 0; 165 166 worker[i] = 0; 167 resume[i] = 0; 168 suspend[i] = 0; 169 } 170 171 threadsAwake = 0; 172 resumeApp = new Event(); 173 174 currentDraw = 0; 175 nextDraw = 0; 176 177 qHead = 0; 178 qSize = 0; 179 180 for(int i = 0; i < 16; i++) 181 { 182 triangleBatch[i] = 0; 183 primitiveBatch[i] = 0; 184 } 185 186 for(int draw = 0; draw < DRAW_COUNT; draw++) 187 { 188 drawCall[draw] = new DrawCall(); 189 drawList[draw] = drawCall[draw]; 190 } 191 192 for(int unit = 0; unit < 16; unit++) 193 { 194 primitiveProgress[unit].init(); 195 } 196 197 for(int cluster = 0; cluster < 16; cluster++) 198 { 199 pixelProgress[cluster].init(); 200 } 201 202 clipFlags = 0; 203 204 swiftConfig = new SwiftConfig(disableServer); 205 updateConfiguration(true); 206 207 sync = new Resource(0); 208 } 209 210 Renderer::~Renderer() 211 { 212 sync->destruct(); 213 214 delete clipper; 215 clipper = nullptr; 216 217 delete blitter; 218 blitter = nullptr; 219 220 terminateThreads(); 221 delete resumeApp; 222 223 for(int draw = 0; draw < DRAW_COUNT; draw++) 224 { 225 delete drawCall[draw]; 226 } 227 228 delete swiftConfig; 229 } 230 231 // This object has to be mem aligned 232 void* Renderer::operator new(size_t size) 233 { 234 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class 235 return sw::allocate(sizeof(Renderer), 16); 236 } 237 238 void Renderer::operator delete(void * mem) 239 { 240 sw::deallocate(mem); 241 } 242 243 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 244 { 245 #ifndef NDEBUG 246 if(count < minPrimitives || count > maxPrimitives) 247 { 248 return; 249 } 250 #endif 251 252 context->drawType = drawType; 253 254 updateConfiguration(); 255 updateClipper(); 256 257 int ss = context->getSuperSampleCount(); 258 int ms = context->getMultiSampleCount(); 259 bool requiresSync = false; 260 261 for(int q = 0; q < ss; q++) 262 { 263 unsigned int oldMultiSampleMask = context->multiSampleMask; 264 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); 265 266 if(!context->multiSampleMask) 267 { 268 continue; 269 } 270 271 sync->lock(sw::PRIVATE); 272 273 if(update || oldMultiSampleMask != context->multiSampleMask) 274 { 275 vertexState = VertexProcessor::update(drawType); 276 setupState = SetupProcessor::update(); 277 pixelState = PixelProcessor::update(); 278 279 vertexRoutine = VertexProcessor::routine(vertexState); 280 setupRoutine = SetupProcessor::routine(setupState); 281 pixelRoutine = PixelProcessor::routine(pixelState); 282 } 283 284 int batch = batchSize / ms; 285 286 int (Renderer::*setupPrimitives)(int batch, int count); 287 288 if(context->isDrawTriangle()) 289 { 290 switch(context->fillMode) 291 { 292 case FILL_SOLID: 293 setupPrimitives = &Renderer::setupSolidTriangles; 294 break; 295 case FILL_WIREFRAME: 296 setupPrimitives = &Renderer::setupWireframeTriangle; 297 batch = 1; 298 break; 299 case FILL_VERTEX: 300 setupPrimitives = &Renderer::setupVertexTriangle; 301 batch = 1; 302 break; 303 default: 304 ASSERT(false); 305 return; 306 } 307 } 308 else if(context->isDrawLine()) 309 { 310 setupPrimitives = &Renderer::setupLines; 311 } 312 else // Point draw 313 { 314 setupPrimitives = &Renderer::setupPoints; 315 } 316 317 DrawCall *draw = nullptr; 318 319 do 320 { 321 for(int i = 0; i < DRAW_COUNT; i++) 322 { 323 if(drawCall[i]->references == -1) 324 { 325 draw = drawCall[i]; 326 drawList[nextDraw & DRAW_COUNT_BITS] = draw; 327 328 break; 329 } 330 } 331 332 if(!draw) 333 { 334 resumeApp->wait(); 335 } 336 } 337 while(!draw); 338 339 DrawData *data = draw->data; 340 341 if(queries.size() != 0) 342 { 343 draw->queries = new std::list<Query*>(); 344 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 345 for(auto &query : queries) 346 { 347 if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 348 { 349 query->addRef(); 350 draw->queries->push_back(query); 351 } 352 } 353 } 354 355 draw->drawType = drawType; 356 draw->batchSize = batch; 357 358 vertexRoutine->bind(); 359 setupRoutine->bind(); 360 pixelRoutine->bind(); 361 362 draw->vertexRoutine = vertexRoutine; 363 draw->setupRoutine = setupRoutine; 364 draw->pixelRoutine = pixelRoutine; 365 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 366 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 367 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 368 draw->setupPrimitives = setupPrimitives; 369 draw->setupState = setupState; 370 371 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 372 { 373 draw->vertexStream[i] = context->input[i].resource; 374 data->input[i] = context->input[i].buffer; 375 data->stride[i] = context->input[i].stride; 376 377 if(draw->vertexStream[i]) 378 { 379 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 380 } 381 } 382 383 if(context->indexBuffer) 384 { 385 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 386 } 387 388 draw->indexBuffer = context->indexBuffer; 389 390 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 391 { 392 draw->texture[sampler] = 0; 393 } 394 395 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 396 { 397 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 398 { 399 draw->texture[sampler] = context->texture[sampler]; 400 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 401 402 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 403 404 requiresSync |= context->sampler[sampler].requiresSync(); 405 } 406 } 407 408 if(context->pixelShader) 409 { 410 if(draw->psDirtyConstF) 411 { 412 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); 413 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 414 draw->psDirtyConstF = 0; 415 } 416 417 if(draw->psDirtyConstI) 418 { 419 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 420 draw->psDirtyConstI = 0; 421 } 422 423 if(draw->psDirtyConstB) 424 { 425 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 426 draw->psDirtyConstB = 0; 427 } 428 429 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 430 } 431 else 432 { 433 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 434 { 435 draw->pUniformBuffers[i] = nullptr; 436 } 437 } 438 439 if(context->pixelShaderModel() <= 0x0104) 440 { 441 for(int stage = 0; stage < 8; stage++) 442 { 443 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) 444 { 445 data->textureStage[stage] = context->textureStage[stage].uniforms; 446 } 447 else break; 448 } 449 } 450 451 if(context->vertexShader) 452 { 453 if(context->vertexShader->getShaderModel() >= 0x0300) 454 { 455 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 456 { 457 if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) 458 { 459 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 460 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 461 462 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 463 464 requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync(); 465 } 466 } 467 } 468 469 if(draw->vsDirtyConstF) 470 { 471 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 472 draw->vsDirtyConstF = 0; 473 } 474 475 if(draw->vsDirtyConstI) 476 { 477 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 478 draw->vsDirtyConstI = 0; 479 } 480 481 if(draw->vsDirtyConstB) 482 { 483 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 484 draw->vsDirtyConstB = 0; 485 } 486 487 if(context->vertexShader->isInstanceIdDeclared()) 488 { 489 data->instanceID = context->instanceID; 490 } 491 492 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 493 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 494 } 495 else 496 { 497 data->ff = ff; 498 499 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 500 draw->vsDirtyConstI = 16; 501 draw->vsDirtyConstB = 16; 502 503 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 504 { 505 draw->vUniformBuffers[i] = nullptr; 506 } 507 508 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 509 { 510 draw->transformFeedbackBuffers[i] = nullptr; 511 } 512 } 513 514 if(pixelState.stencilActive) 515 { 516 data->stencil[0] = stencil; 517 data->stencil[1] = stencilCCW; 518 } 519 520 if(pixelState.fogActive) 521 { 522 data->fog = fog; 523 } 524 525 if(setupState.isDrawPoint) 526 { 527 data->point = point; 528 } 529 530 data->lineWidth = context->lineWidth; 531 532 data->factor = factor; 533 534 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 535 { 536 float ref = context->alphaReference * (1.0f / 255.0f); 537 float margin = sw::min(ref, 1.0f - ref); 538 539 if(ms == 4) 540 { 541 data->a2c0 = replicate(ref - margin * 0.6f); 542 data->a2c1 = replicate(ref - margin * 0.2f); 543 data->a2c2 = replicate(ref + margin * 0.2f); 544 data->a2c3 = replicate(ref + margin * 0.6f); 545 } 546 else if(ms == 2) 547 { 548 data->a2c0 = replicate(ref - margin * 0.3f); 549 data->a2c1 = replicate(ref + margin * 0.3f); 550 } 551 else ASSERT(false); 552 } 553 554 if(pixelState.occlusionEnabled) 555 { 556 for(int cluster = 0; cluster < clusterCount; cluster++) 557 { 558 data->occlusion[cluster] = 0; 559 } 560 } 561 562 #if PERF_PROFILE 563 for(int cluster = 0; cluster < clusterCount; cluster++) 564 { 565 for(int i = 0; i < PERF_TIMERS; i++) 566 { 567 data->cycles[i][cluster] = 0; 568 } 569 } 570 #endif 571 572 // Viewport 573 { 574 float W = 0.5f * viewport.width; 575 float H = 0.5f * viewport.height; 576 float X0 = viewport.x0 + W; 577 float Y0 = viewport.y0 + H; 578 float N = viewport.minZ; 579 float F = viewport.maxZ; 580 float Z = F - N; 581 582 if(context->isDrawTriangle(false)) 583 { 584 N += context->depthBias; 585 } 586 587 if(complementaryDepthBuffer) 588 { 589 Z = -Z; 590 N = 1 - N; 591 } 592 593 static const float X[5][16] = // Fragment offsets 594 { 595 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 596 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 597 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 598 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 599 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples 600 }; 601 602 static const float Y[5][16] = // Fragment offsets 603 { 604 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 605 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 606 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 607 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 608 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples 609 }; 610 611 int s = sw::log2(ss); 612 613 data->Wx16 = replicate(W * 16); 614 data->Hx16 = replicate(H * 16); 615 data->X0x16 = replicate(X0 * 16 - 8); 616 data->Y0x16 = replicate(Y0 * 16 - 8); 617 data->XXXX = replicate(X[s][q] / W); 618 data->YYYY = replicate(Y[s][q] / H); 619 data->halfPixelX = replicate(0.5f / W); 620 data->halfPixelY = replicate(0.5f / H); 621 data->viewportHeight = abs(viewport.height); 622 data->slopeDepthBias = context->slopeDepthBias; 623 data->depthRange = Z; 624 data->depthNear = N; 625 draw->clipFlags = clipFlags; 626 627 if(clipFlags) 628 { 629 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 630 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 631 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 632 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 633 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 634 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 635 } 636 } 637 638 // Target 639 { 640 for(int index = 0; index < RENDERTARGETS; index++) 641 { 642 draw->renderTarget[index] = context->renderTarget[index]; 643 644 if(draw->renderTarget[index]) 645 { 646 unsigned int layer = context->renderTargetLayer[index]; 647 requiresSync |= context->renderTarget[index]->requiresSync(); 648 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 649 data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); 650 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 651 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 652 } 653 } 654 655 draw->depthBuffer = context->depthBuffer; 656 draw->stencilBuffer = context->stencilBuffer; 657 658 if(draw->depthBuffer) 659 { 660 unsigned int layer = context->depthBufferLayer; 661 requiresSync |= context->depthBuffer->requiresSync(); 662 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 663 data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); 664 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 665 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 666 } 667 668 if(draw->stencilBuffer) 669 { 670 unsigned int layer = context->stencilBufferLayer; 671 requiresSync |= context->stencilBuffer->requiresSync(); 672 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); 673 data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); 674 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 675 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 676 } 677 } 678 679 // Scissor 680 { 681 data->scissorX0 = scissor.x0; 682 data->scissorX1 = scissor.x1; 683 data->scissorY0 = scissor.y0; 684 data->scissorY1 = scissor.y1; 685 } 686 687 draw->primitive = 0; 688 draw->count = count; 689 690 draw->references = (count + batch - 1) / batch; 691 692 schedulerMutex.lock(); 693 ++nextDraw; // Atomic 694 schedulerMutex.unlock(); 695 696 #ifndef NDEBUG 697 if(threadCount == 1) // Use main thread for draw execution 698 { 699 threadsAwake = 1; 700 task[0].type = Task::RESUME; 701 702 taskLoop(0); 703 } 704 else 705 #endif 706 { 707 if(!threadsAwake) 708 { 709 suspend[0]->wait(); 710 711 threadsAwake = 1; 712 task[0].type = Task::RESUME; 713 714 resume[0]->signal(); 715 } 716 } 717 } 718 719 // TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization. 720 if(requiresSync) 721 { 722 synchronize(); 723 } 724 } 725 726 void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) 727 { 728 blitter->clear(value, format, dest, clearRect, rgbaMask); 729 } 730 731 void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) 732 { 733 blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); 734 } 735 736 void Renderer::blit3D(Surface *source, Surface *dest) 737 { 738 blitter->blit3D(source, dest); 739 } 740 741 void Renderer::threadFunction(void *parameters) 742 { 743 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 744 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 745 746 if(logPrecision < IEEE) 747 { 748 CPUID::setFlushToZero(true); 749 CPUID::setDenormalsAreZero(true); 750 } 751 752 renderer->threadLoop(threadIndex); 753 } 754 755 void Renderer::threadLoop(int threadIndex) 756 { 757 while(!exitThreads) 758 { 759 taskLoop(threadIndex); 760 761 suspend[threadIndex]->signal(); 762 resume[threadIndex]->wait(); 763 } 764 } 765 766 void Renderer::taskLoop(int threadIndex) 767 { 768 while(task[threadIndex].type != Task::SUSPEND) 769 { 770 scheduleTask(threadIndex); 771 executeTask(threadIndex); 772 } 773 } 774 775 void Renderer::findAvailableTasks() 776 { 777 // Find pixel tasks 778 for(int cluster = 0; cluster < clusterCount; cluster++) 779 { 780 if(!pixelProgress[cluster].executing) 781 { 782 for(int unit = 0; unit < unitCount; unit++) 783 { 784 if(primitiveProgress[unit].references > 0) // Contains processed primitives 785 { 786 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 787 { 788 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 789 { 790 Task &task = taskQueue[qHead]; 791 task.type = Task::PIXELS; 792 task.primitiveUnit = unit; 793 task.pixelCluster = cluster; 794 795 pixelProgress[cluster].executing = true; 796 797 // Commit to the task queue 798 qHead = (qHead + 1) & TASK_COUNT_BITS; 799 qSize++; 800 801 break; 802 } 803 } 804 } 805 } 806 } 807 } 808 809 // Find primitive tasks 810 if(currentDraw == nextDraw) 811 { 812 return; // No more primitives to process 813 } 814 815 for(int unit = 0; unit < unitCount; unit++) 816 { 817 DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; 818 819 int primitive = draw->primitive; 820 int count = draw->count; 821 822 if(primitive >= count) 823 { 824 ++currentDraw; // Atomic 825 826 if(currentDraw == nextDraw) 827 { 828 return; // No more primitives to process 829 } 830 831 draw = drawList[currentDraw & DRAW_COUNT_BITS]; 832 } 833 834 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 835 { 836 primitive = draw->primitive; 837 count = draw->count; 838 int batch = draw->batchSize; 839 840 primitiveProgress[unit].drawCall = currentDraw; 841 primitiveProgress[unit].firstPrimitive = primitive; 842 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 843 844 draw->primitive += batch; 845 846 Task &task = taskQueue[qHead]; 847 task.type = Task::PRIMITIVES; 848 task.primitiveUnit = unit; 849 850 primitiveProgress[unit].references = -1; 851 852 // Commit to the task queue 853 qHead = (qHead + 1) & TASK_COUNT_BITS; 854 qSize++; 855 } 856 } 857 } 858 859 void Renderer::scheduleTask(int threadIndex) 860 { 861 schedulerMutex.lock(); 862 863 int curThreadsAwake = threadsAwake; 864 865 if((int)qSize < threadCount - curThreadsAwake + 1) 866 { 867 findAvailableTasks(); 868 } 869 870 if(qSize != 0) 871 { 872 task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; 873 qSize--; 874 875 if(curThreadsAwake != threadCount) 876 { 877 int wakeup = qSize - curThreadsAwake + 1; 878 879 for(int i = 0; i < threadCount && wakeup > 0; i++) 880 { 881 if(task[i].type == Task::SUSPEND) 882 { 883 suspend[i]->wait(); 884 task[i].type = Task::RESUME; 885 resume[i]->signal(); 886 887 ++threadsAwake; // Atomic 888 wakeup--; 889 } 890 } 891 } 892 } 893 else 894 { 895 task[threadIndex].type = Task::SUSPEND; 896 897 --threadsAwake; // Atomic 898 } 899 900 schedulerMutex.unlock(); 901 } 902 903 void Renderer::executeTask(int threadIndex) 904 { 905 #if PERF_HUD 906 int64_t startTick = Timer::ticks(); 907 #endif 908 909 switch(task[threadIndex].type) 910 { 911 case Task::PRIMITIVES: 912 { 913 int unit = task[threadIndex].primitiveUnit; 914 915 int input = primitiveProgress[unit].firstPrimitive; 916 int count = primitiveProgress[unit].primitiveCount; 917 DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 918 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 919 920 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 921 922 #if PERF_HUD 923 int64_t time = Timer::ticks(); 924 vertexTime[threadIndex] += time - startTick; 925 startTick = time; 926 #endif 927 928 int visible = 0; 929 930 if(!draw->setupState.rasterizerDiscard) 931 { 932 visible = (this->*setupPrimitives)(unit, count); 933 } 934 935 primitiveProgress[unit].visible = visible; 936 primitiveProgress[unit].references = clusterCount; 937 938 #if PERF_HUD 939 setupTime[threadIndex] += Timer::ticks() - startTick; 940 #endif 941 } 942 break; 943 case Task::PIXELS: 944 { 945 int unit = task[threadIndex].primitiveUnit; 946 int visible = primitiveProgress[unit].visible; 947 948 if(visible > 0) 949 { 950 int cluster = task[threadIndex].pixelCluster; 951 Primitive *primitive = primitiveBatch[unit]; 952 DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; 953 DrawData *data = draw->data; 954 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 955 956 pixelRoutine(primitive, visible, cluster, data); 957 } 958 959 finishRendering(task[threadIndex]); 960 961 #if PERF_HUD 962 pixelTime[threadIndex] += Timer::ticks() - startTick; 963 #endif 964 } 965 break; 966 case Task::RESUME: 967 break; 968 case Task::SUSPEND: 969 break; 970 default: 971 ASSERT(false); 972 } 973 } 974 975 void Renderer::synchronize() 976 { 977 sync->lock(sw::PUBLIC); 978 sync->unlock(); 979 } 980 981 void Renderer::finishRendering(Task &pixelTask) 982 { 983 int unit = pixelTask.primitiveUnit; 984 int cluster = pixelTask.pixelCluster; 985 986 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 987 DrawData &data = *draw.data; 988 int primitive = primitiveProgress[unit].firstPrimitive; 989 int count = primitiveProgress[unit].primitiveCount; 990 int processedPrimitives = primitive + count; 991 992 pixelProgress[cluster].processedPrimitives = processedPrimitives; 993 994 if(pixelProgress[cluster].processedPrimitives >= draw.count) 995 { 996 ++pixelProgress[cluster].drawCall; // Atomic 997 pixelProgress[cluster].processedPrimitives = 0; 998 } 999 1000 int ref = primitiveProgress[unit].references--; // Atomic 1001 1002 if(ref == 0) 1003 { 1004 ref = draw.references--; // Atomic 1005 1006 if(ref == 0) 1007 { 1008 #if PERF_PROFILE 1009 for(int cluster = 0; cluster < clusterCount; cluster++) 1010 { 1011 for(int i = 0; i < PERF_TIMERS; i++) 1012 { 1013 profiler.cycles[i] += data.cycles[i][cluster]; 1014 } 1015 } 1016 #endif 1017 1018 if(draw.queries) 1019 { 1020 for(auto &query : *(draw.queries)) 1021 { 1022 switch(query->type) 1023 { 1024 case Query::FRAGMENTS_PASSED: 1025 for(int cluster = 0; cluster < clusterCount; cluster++) 1026 { 1027 query->data += data.occlusion[cluster]; 1028 } 1029 break; 1030 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 1031 query->data += processedPrimitives; 1032 break; 1033 default: 1034 break; 1035 } 1036 1037 query->release(); 1038 } 1039 1040 delete draw.queries; 1041 draw.queries = 0; 1042 } 1043 1044 for(int i = 0; i < RENDERTARGETS; i++) 1045 { 1046 if(draw.renderTarget[i]) 1047 { 1048 draw.renderTarget[i]->unlockInternal(); 1049 } 1050 } 1051 1052 if(draw.depthBuffer) 1053 { 1054 draw.depthBuffer->unlockInternal(); 1055 } 1056 1057 if(draw.stencilBuffer) 1058 { 1059 draw.stencilBuffer->unlockStencil(); 1060 } 1061 1062 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 1063 { 1064 if(draw.texture[i]) 1065 { 1066 draw.texture[i]->unlock(); 1067 } 1068 } 1069 1070 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 1071 { 1072 if(draw.vertexStream[i]) 1073 { 1074 draw.vertexStream[i]->unlock(); 1075 } 1076 } 1077 1078 if(draw.indexBuffer) 1079 { 1080 draw.indexBuffer->unlock(); 1081 } 1082 1083 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 1084 { 1085 if(draw.pUniformBuffers[i]) 1086 { 1087 draw.pUniformBuffers[i]->unlock(); 1088 } 1089 if(draw.vUniformBuffers[i]) 1090 { 1091 draw.vUniformBuffers[i]->unlock(); 1092 } 1093 } 1094 1095 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 1096 { 1097 if(draw.transformFeedbackBuffers[i]) 1098 { 1099 draw.transformFeedbackBuffers[i]->unlock(); 1100 } 1101 } 1102 1103 draw.vertexRoutine->unbind(); 1104 draw.setupRoutine->unbind(); 1105 draw.pixelRoutine->unbind(); 1106 1107 sync->unlock(); 1108 1109 draw.references = -1; 1110 resumeApp->signal(); 1111 } 1112 } 1113 1114 pixelProgress[cluster].executing = false; 1115 } 1116 1117 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 1118 { 1119 Triangle *triangle = triangleBatch[unit]; 1120 int primitiveDrawCall = primitiveProgress[unit].drawCall; 1121 DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; 1122 DrawData *data = draw->data; 1123 VertexTask *task = vertexTask[thread]; 1124 1125 const void *indices = data->indices; 1126 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1127 1128 if(task->vertexCache.drawCall != primitiveDrawCall) 1129 { 1130 task->vertexCache.clear(); 1131 task->vertexCache.drawCall = primitiveDrawCall; 1132 } 1133 1134 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1135 1136 switch(draw->drawType) 1137 { 1138 case DRAW_POINTLIST: 1139 { 1140 unsigned int index = start; 1141 1142 for(unsigned int i = 0; i < triangleCount; i++) 1143 { 1144 batch[i][0] = index; 1145 batch[i][1] = index; 1146 batch[i][2] = index; 1147 1148 index += 1; 1149 } 1150 } 1151 break; 1152 case DRAW_LINELIST: 1153 { 1154 unsigned int index = 2 * start; 1155 1156 for(unsigned int i = 0; i < triangleCount; i++) 1157 { 1158 batch[i][0] = index + 0; 1159 batch[i][1] = index + 1; 1160 batch[i][2] = index + 1; 1161 1162 index += 2; 1163 } 1164 } 1165 break; 1166 case DRAW_LINESTRIP: 1167 { 1168 unsigned int index = start; 1169 1170 for(unsigned int i = 0; i < triangleCount; i++) 1171 { 1172 batch[i][0] = index + 0; 1173 batch[i][1] = index + 1; 1174 batch[i][2] = index + 1; 1175 1176 index += 1; 1177 } 1178 } 1179 break; 1180 case DRAW_LINELOOP: 1181 { 1182 unsigned int index = start; 1183 1184 for(unsigned int i = 0; i < triangleCount; i++) 1185 { 1186 batch[i][0] = (index + 0) % loop; 1187 batch[i][1] = (index + 1) % loop; 1188 batch[i][2] = (index + 1) % loop; 1189 1190 index += 1; 1191 } 1192 } 1193 break; 1194 case DRAW_TRIANGLELIST: 1195 { 1196 unsigned int index = 3 * start; 1197 1198 for(unsigned int i = 0; i < triangleCount; i++) 1199 { 1200 batch[i][0] = index + 0; 1201 batch[i][1] = index + 1; 1202 batch[i][2] = index + 2; 1203 1204 index += 3; 1205 } 1206 } 1207 break; 1208 case DRAW_TRIANGLESTRIP: 1209 { 1210 unsigned int index = start; 1211 1212 for(unsigned int i = 0; i < triangleCount; i++) 1213 { 1214 if(leadingVertexFirst) 1215 { 1216 batch[i][0] = index + 0; 1217 batch[i][1] = index + (index & 1) + 1; 1218 batch[i][2] = index + (~index & 1) + 1; 1219 } 1220 else 1221 { 1222 batch[i][0] = index + (index & 1); 1223 batch[i][1] = index + (~index & 1); 1224 batch[i][2] = index + 2; 1225 } 1226 1227 index += 1; 1228 } 1229 } 1230 break; 1231 case DRAW_TRIANGLEFAN: 1232 { 1233 unsigned int index = start; 1234 1235 for(unsigned int i = 0; i < triangleCount; i++) 1236 { 1237 if(leadingVertexFirst) 1238 { 1239 batch[i][0] = index + 1; 1240 batch[i][1] = index + 2; 1241 batch[i][2] = 0; 1242 } 1243 else 1244 { 1245 batch[i][0] = 0; 1246 batch[i][1] = index + 1; 1247 batch[i][2] = index + 2; 1248 } 1249 1250 index += 1; 1251 } 1252 } 1253 break; 1254 case DRAW_INDEXEDPOINTLIST8: 1255 { 1256 const unsigned char *index = (const unsigned char*)indices + start; 1257 1258 for(unsigned int i = 0; i < triangleCount; i++) 1259 { 1260 batch[i][0] = *index; 1261 batch[i][1] = *index; 1262 batch[i][2] = *index; 1263 1264 index += 1; 1265 } 1266 } 1267 break; 1268 case DRAW_INDEXEDPOINTLIST16: 1269 { 1270 const unsigned short *index = (const unsigned short*)indices + start; 1271 1272 for(unsigned int i = 0; i < triangleCount; i++) 1273 { 1274 batch[i][0] = *index; 1275 batch[i][1] = *index; 1276 batch[i][2] = *index; 1277 1278 index += 1; 1279 } 1280 } 1281 break; 1282 case DRAW_INDEXEDPOINTLIST32: 1283 { 1284 const unsigned int *index = (const unsigned int*)indices + start; 1285 1286 for(unsigned int i = 0; i < triangleCount; i++) 1287 { 1288 batch[i][0] = *index; 1289 batch[i][1] = *index; 1290 batch[i][2] = *index; 1291 1292 index += 1; 1293 } 1294 } 1295 break; 1296 case DRAW_INDEXEDLINELIST8: 1297 { 1298 const unsigned char *index = (const unsigned char*)indices + 2 * start; 1299 1300 for(unsigned int i = 0; i < triangleCount; i++) 1301 { 1302 batch[i][0] = index[0]; 1303 batch[i][1] = index[1]; 1304 batch[i][2] = index[1]; 1305 1306 index += 2; 1307 } 1308 } 1309 break; 1310 case DRAW_INDEXEDLINELIST16: 1311 { 1312 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1313 1314 for(unsigned int i = 0; i < triangleCount; i++) 1315 { 1316 batch[i][0] = index[0]; 1317 batch[i][1] = index[1]; 1318 batch[i][2] = index[1]; 1319 1320 index += 2; 1321 } 1322 } 1323 break; 1324 case DRAW_INDEXEDLINELIST32: 1325 { 1326 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1327 1328 for(unsigned int i = 0; i < triangleCount; i++) 1329 { 1330 batch[i][0] = index[0]; 1331 batch[i][1] = index[1]; 1332 batch[i][2] = index[1]; 1333 1334 index += 2; 1335 } 1336 } 1337 break; 1338 case DRAW_INDEXEDLINESTRIP8: 1339 { 1340 const unsigned char *index = (const unsigned char*)indices + start; 1341 1342 for(unsigned int i = 0; i < triangleCount; i++) 1343 { 1344 batch[i][0] = index[0]; 1345 batch[i][1] = index[1]; 1346 batch[i][2] = index[1]; 1347 1348 index += 1; 1349 } 1350 } 1351 break; 1352 case DRAW_INDEXEDLINESTRIP16: 1353 { 1354 const unsigned short *index = (const unsigned short*)indices + start; 1355 1356 for(unsigned int i = 0; i < triangleCount; i++) 1357 { 1358 batch[i][0] = index[0]; 1359 batch[i][1] = index[1]; 1360 batch[i][2] = index[1]; 1361 1362 index += 1; 1363 } 1364 } 1365 break; 1366 case DRAW_INDEXEDLINESTRIP32: 1367 { 1368 const unsigned int *index = (const unsigned int*)indices + start; 1369 1370 for(unsigned int i = 0; i < triangleCount; i++) 1371 { 1372 batch[i][0] = index[0]; 1373 batch[i][1] = index[1]; 1374 batch[i][2] = index[1]; 1375 1376 index += 1; 1377 } 1378 } 1379 break; 1380 case DRAW_INDEXEDLINELOOP8: 1381 { 1382 const unsigned char *index = (const unsigned char*)indices; 1383 1384 for(unsigned int i = 0; i < triangleCount; i++) 1385 { 1386 batch[i][0] = index[(start + i + 0) % loop]; 1387 batch[i][1] = index[(start + i + 1) % loop]; 1388 batch[i][2] = index[(start + i + 1) % loop]; 1389 } 1390 } 1391 break; 1392 case DRAW_INDEXEDLINELOOP16: 1393 { 1394 const unsigned short *index = (const unsigned short*)indices; 1395 1396 for(unsigned int i = 0; i < triangleCount; i++) 1397 { 1398 batch[i][0] = index[(start + i + 0) % loop]; 1399 batch[i][1] = index[(start + i + 1) % loop]; 1400 batch[i][2] = index[(start + i + 1) % loop]; 1401 } 1402 } 1403 break; 1404 case DRAW_INDEXEDLINELOOP32: 1405 { 1406 const unsigned int *index = (const unsigned int*)indices; 1407 1408 for(unsigned int i = 0; i < triangleCount; i++) 1409 { 1410 batch[i][0] = index[(start + i + 0) % loop]; 1411 batch[i][1] = index[(start + i + 1) % loop]; 1412 batch[i][2] = index[(start + i + 1) % loop]; 1413 } 1414 } 1415 break; 1416 case DRAW_INDEXEDTRIANGLELIST8: 1417 { 1418 const unsigned char *index = (const unsigned char*)indices + 3 * start; 1419 1420 for(unsigned int i = 0; i < triangleCount; i++) 1421 { 1422 batch[i][0] = index[0]; 1423 batch[i][1] = index[1]; 1424 batch[i][2] = index[2]; 1425 1426 index += 3; 1427 } 1428 } 1429 break; 1430 case DRAW_INDEXEDTRIANGLELIST16: 1431 { 1432 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1433 1434 for(unsigned int i = 0; i < triangleCount; i++) 1435 { 1436 batch[i][0] = index[0]; 1437 batch[i][1] = index[1]; 1438 batch[i][2] = index[2]; 1439 1440 index += 3; 1441 } 1442 } 1443 break; 1444 case DRAW_INDEXEDTRIANGLELIST32: 1445 { 1446 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1447 1448 for(unsigned int i = 0; i < triangleCount; i++) 1449 { 1450 batch[i][0] = index[0]; 1451 batch[i][1] = index[1]; 1452 batch[i][2] = index[2]; 1453 1454 index += 3; 1455 } 1456 } 1457 break; 1458 case DRAW_INDEXEDTRIANGLESTRIP8: 1459 { 1460 const unsigned char *index = (const unsigned char*)indices + start; 1461 1462 for(unsigned int i = 0; i < triangleCount; i++) 1463 { 1464 batch[i][0] = index[0]; 1465 batch[i][1] = index[((start + i) & 1) + 1]; 1466 batch[i][2] = index[(~(start + i) & 1) + 1]; 1467 1468 index += 1; 1469 } 1470 } 1471 break; 1472 case DRAW_INDEXEDTRIANGLESTRIP16: 1473 { 1474 const unsigned short *index = (const unsigned short*)indices + start; 1475 1476 for(unsigned int i = 0; i < triangleCount; i++) 1477 { 1478 batch[i][0] = index[0]; 1479 batch[i][1] = index[((start + i) & 1) + 1]; 1480 batch[i][2] = index[(~(start + i) & 1) + 1]; 1481 1482 index += 1; 1483 } 1484 } 1485 break; 1486 case DRAW_INDEXEDTRIANGLESTRIP32: 1487 { 1488 const unsigned int *index = (const unsigned int*)indices + start; 1489 1490 for(unsigned int i = 0; i < triangleCount; i++) 1491 { 1492 batch[i][0] = index[0]; 1493 batch[i][1] = index[((start + i) & 1) + 1]; 1494 batch[i][2] = index[(~(start + i) & 1) + 1]; 1495 1496 index += 1; 1497 } 1498 } 1499 break; 1500 case DRAW_INDEXEDTRIANGLEFAN8: 1501 { 1502 const unsigned char *index = (const unsigned char*)indices; 1503 1504 for(unsigned int i = 0; i < triangleCount; i++) 1505 { 1506 batch[i][0] = index[start + i + 1]; 1507 batch[i][1] = index[start + i + 2]; 1508 batch[i][2] = index[0]; 1509 } 1510 } 1511 break; 1512 case DRAW_INDEXEDTRIANGLEFAN16: 1513 { 1514 const unsigned short *index = (const unsigned short*)indices; 1515 1516 for(unsigned int i = 0; i < triangleCount; i++) 1517 { 1518 batch[i][0] = index[start + i + 1]; 1519 batch[i][1] = index[start + i + 2]; 1520 batch[i][2] = index[0]; 1521 } 1522 } 1523 break; 1524 case DRAW_INDEXEDTRIANGLEFAN32: 1525 { 1526 const unsigned int *index = (const unsigned int*)indices; 1527 1528 for(unsigned int i = 0; i < triangleCount; i++) 1529 { 1530 batch[i][0] = index[start + i + 1]; 1531 batch[i][1] = index[start + i + 2]; 1532 batch[i][2] = index[0]; 1533 } 1534 } 1535 break; 1536 case DRAW_QUADLIST: 1537 { 1538 unsigned int index = 4 * start / 2; 1539 1540 for(unsigned int i = 0; i < triangleCount; i += 2) 1541 { 1542 batch[i+0][0] = index + 0; 1543 batch[i+0][1] = index + 1; 1544 batch[i+0][2] = index + 2; 1545 1546 batch[i+1][0] = index + 0; 1547 batch[i+1][1] = index + 2; 1548 batch[i+1][2] = index + 3; 1549 1550 index += 4; 1551 } 1552 } 1553 break; 1554 default: 1555 ASSERT(false); 1556 return; 1557 } 1558 1559 task->primitiveStart = start; 1560 task->vertexCount = triangleCount * 3; 1561 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1562 } 1563 1564 int Renderer::setupSolidTriangles(int unit, int count) 1565 { 1566 Triangle *triangle = triangleBatch[unit]; 1567 Primitive *primitive = primitiveBatch[unit]; 1568 1569 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1570 SetupProcessor::State &state = draw.setupState; 1571 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1572 1573 int ms = state.multiSample; 1574 int pos = state.positionRegister; 1575 const DrawData *data = draw.data; 1576 int visible = 0; 1577 1578 for(int i = 0; i < count; i++, triangle++) 1579 { 1580 Vertex &v0 = triangle->v0; 1581 Vertex &v1 = triangle->v1; 1582 Vertex &v2 = triangle->v2; 1583 1584 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1585 { 1586 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1587 1588 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1589 1590 if(clipFlagsOr != Clipper::CLIP_FINITE) 1591 { 1592 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1593 { 1594 continue; 1595 } 1596 } 1597 1598 if(setupRoutine(primitive, triangle, &polygon, data)) 1599 { 1600 primitive += ms; 1601 visible++; 1602 } 1603 } 1604 } 1605 1606 return visible; 1607 } 1608 1609 int Renderer::setupWireframeTriangle(int unit, int count) 1610 { 1611 Triangle *triangle = triangleBatch[unit]; 1612 Primitive *primitive = primitiveBatch[unit]; 1613 int visible = 0; 1614 1615 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1616 SetupProcessor::State &state = draw.setupState; 1617 1618 const Vertex &v0 = triangle[0].v0; 1619 const Vertex &v1 = triangle[0].v1; 1620 const Vertex &v2 = triangle[0].v2; 1621 1622 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1623 1624 if(state.cullMode == CULL_CLOCKWISE) 1625 { 1626 if(d >= 0) return 0; 1627 } 1628 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1629 { 1630 if(d <= 0) return 0; 1631 } 1632 1633 // Copy attributes 1634 triangle[1].v0 = v1; 1635 triangle[1].v1 = v2; 1636 triangle[2].v0 = v2; 1637 triangle[2].v1 = v0; 1638 1639 if(state.color[0][0].flat) // FIXME 1640 { 1641 for(int i = 0; i < 2; i++) 1642 { 1643 triangle[1].v0.C[i] = triangle[0].v0.C[i]; 1644 triangle[1].v1.C[i] = triangle[0].v0.C[i]; 1645 triangle[2].v0.C[i] = triangle[0].v0.C[i]; 1646 triangle[2].v1.C[i] = triangle[0].v0.C[i]; 1647 } 1648 } 1649 1650 for(int i = 0; i < 3; i++) 1651 { 1652 if(setupLine(*primitive, *triangle, draw)) 1653 { 1654 primitive->area = 0.5f * d; 1655 1656 primitive++; 1657 visible++; 1658 } 1659 1660 triangle++; 1661 } 1662 1663 return visible; 1664 } 1665 1666 int Renderer::setupVertexTriangle(int unit, int count) 1667 { 1668 Triangle *triangle = triangleBatch[unit]; 1669 Primitive *primitive = primitiveBatch[unit]; 1670 int visible = 0; 1671 1672 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1673 SetupProcessor::State &state = draw.setupState; 1674 1675 const Vertex &v0 = triangle[0].v0; 1676 const Vertex &v1 = triangle[0].v1; 1677 const Vertex &v2 = triangle[0].v2; 1678 1679 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1680 1681 if(state.cullMode == CULL_CLOCKWISE) 1682 { 1683 if(d >= 0) return 0; 1684 } 1685 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1686 { 1687 if(d <= 0) return 0; 1688 } 1689 1690 // Copy attributes 1691 triangle[1].v0 = v1; 1692 triangle[2].v0 = v2; 1693 1694 for(int i = 0; i < 3; i++) 1695 { 1696 if(setupPoint(*primitive, *triangle, draw)) 1697 { 1698 primitive->area = 0.5f * d; 1699 1700 primitive++; 1701 visible++; 1702 } 1703 1704 triangle++; 1705 } 1706 1707 return visible; 1708 } 1709 1710 int Renderer::setupLines(int unit, int count) 1711 { 1712 Triangle *triangle = triangleBatch[unit]; 1713 Primitive *primitive = primitiveBatch[unit]; 1714 int visible = 0; 1715 1716 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1717 SetupProcessor::State &state = draw.setupState; 1718 1719 int ms = state.multiSample; 1720 1721 for(int i = 0; i < count; i++) 1722 { 1723 if(setupLine(*primitive, *triangle, draw)) 1724 { 1725 primitive += ms; 1726 visible++; 1727 } 1728 1729 triangle++; 1730 } 1731 1732 return visible; 1733 } 1734 1735 int Renderer::setupPoints(int unit, int count) 1736 { 1737 Triangle *triangle = triangleBatch[unit]; 1738 Primitive *primitive = primitiveBatch[unit]; 1739 int visible = 0; 1740 1741 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1742 SetupProcessor::State &state = draw.setupState; 1743 1744 int ms = state.multiSample; 1745 1746 for(int i = 0; i < count; i++) 1747 { 1748 if(setupPoint(*primitive, *triangle, draw)) 1749 { 1750 primitive += ms; 1751 visible++; 1752 } 1753 1754 triangle++; 1755 } 1756 1757 return visible; 1758 } 1759 1760 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1761 { 1762 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1763 const SetupProcessor::State &state = draw.setupState; 1764 const DrawData &data = *draw.data; 1765 1766 float lineWidth = data.lineWidth; 1767 1768 Vertex &v0 = triangle.v0; 1769 Vertex &v1 = triangle.v1; 1770 1771 int pos = state.positionRegister; 1772 1773 const float4 &P0 = v0.v[pos]; 1774 const float4 &P1 = v1.v[pos]; 1775 1776 if(P0.w <= 0 && P1.w <= 0) 1777 { 1778 return false; 1779 } 1780 1781 const float W = data.Wx16[0] * (1.0f / 16.0f); 1782 const float H = data.Hx16[0] * (1.0f / 16.0f); 1783 1784 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1785 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1786 1787 if(dx == 0 && dy == 0) 1788 { 1789 return false; 1790 } 1791 1792 if(state.multiSample > 1) // Rectangle 1793 { 1794 float4 P[4]; 1795 int C[4]; 1796 1797 P[0] = P0; 1798 P[1] = P1; 1799 P[2] = P1; 1800 P[3] = P0; 1801 1802 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1803 1804 dx *= scale; 1805 dy *= scale; 1806 1807 float dx0h = dx * P0.w / H; 1808 float dy0w = dy * P0.w / W; 1809 1810 float dx1h = dx * P1.w / H; 1811 float dy1w = dy * P1.w / W; 1812 1813 P[0].x += -dy0w; 1814 P[0].y += +dx0h; 1815 C[0] = clipper->computeClipFlags(P[0]); 1816 1817 P[1].x += -dy1w; 1818 P[1].y += +dx1h; 1819 C[1] = clipper->computeClipFlags(P[1]); 1820 1821 P[2].x += +dy1w; 1822 P[2].y += -dx1h; 1823 C[2] = clipper->computeClipFlags(P[2]); 1824 1825 P[3].x += +dy0w; 1826 P[3].y += -dx0h; 1827 C[3] = clipper->computeClipFlags(P[3]); 1828 1829 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1830 { 1831 Polygon polygon(P, 4); 1832 1833 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1834 1835 if(clipFlagsOr != Clipper::CLIP_FINITE) 1836 { 1837 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1838 { 1839 return false; 1840 } 1841 } 1842 1843 return setupRoutine(&primitive, &triangle, &polygon, &data); 1844 } 1845 } 1846 else // Diamond test convention 1847 { 1848 float4 P[8]; 1849 int C[8]; 1850 1851 P[0] = P0; 1852 P[1] = P0; 1853 P[2] = P0; 1854 P[3] = P0; 1855 P[4] = P1; 1856 P[5] = P1; 1857 P[6] = P1; 1858 P[7] = P1; 1859 1860 float dx0 = lineWidth * 0.5f * P0.w / W; 1861 float dy0 = lineWidth * 0.5f * P0.w / H; 1862 1863 float dx1 = lineWidth * 0.5f * P1.w / W; 1864 float dy1 = lineWidth * 0.5f * P1.w / H; 1865 1866 P[0].x += -dx0; 1867 C[0] = clipper->computeClipFlags(P[0]); 1868 1869 P[1].y += +dy0; 1870 C[1] = clipper->computeClipFlags(P[1]); 1871 1872 P[2].x += +dx0; 1873 C[2] = clipper->computeClipFlags(P[2]); 1874 1875 P[3].y += -dy0; 1876 C[3] = clipper->computeClipFlags(P[3]); 1877 1878 P[4].x += -dx1; 1879 C[4] = clipper->computeClipFlags(P[4]); 1880 1881 P[5].y += +dy1; 1882 C[5] = clipper->computeClipFlags(P[5]); 1883 1884 P[6].x += +dx1; 1885 C[6] = clipper->computeClipFlags(P[6]); 1886 1887 P[7].y += -dy1; 1888 C[7] = clipper->computeClipFlags(P[7]); 1889 1890 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1891 { 1892 float4 L[6]; 1893 1894 if(dx > -dy) 1895 { 1896 if(dx > dy) // Right 1897 { 1898 L[0] = P[0]; 1899 L[1] = P[1]; 1900 L[2] = P[5]; 1901 L[3] = P[6]; 1902 L[4] = P[7]; 1903 L[5] = P[3]; 1904 } 1905 else // Down 1906 { 1907 L[0] = P[0]; 1908 L[1] = P[4]; 1909 L[2] = P[5]; 1910 L[3] = P[6]; 1911 L[4] = P[2]; 1912 L[5] = P[3]; 1913 } 1914 } 1915 else 1916 { 1917 if(dx > dy) // Up 1918 { 1919 L[0] = P[0]; 1920 L[1] = P[1]; 1921 L[2] = P[2]; 1922 L[3] = P[6]; 1923 L[4] = P[7]; 1924 L[5] = P[4]; 1925 } 1926 else // Left 1927 { 1928 L[0] = P[1]; 1929 L[1] = P[2]; 1930 L[2] = P[3]; 1931 L[3] = P[7]; 1932 L[4] = P[4]; 1933 L[5] = P[5]; 1934 } 1935 } 1936 1937 Polygon polygon(L, 6); 1938 1939 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1940 1941 if(clipFlagsOr != Clipper::CLIP_FINITE) 1942 { 1943 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1944 { 1945 return false; 1946 } 1947 } 1948 1949 return setupRoutine(&primitive, &triangle, &polygon, &data); 1950 } 1951 } 1952 1953 return false; 1954 } 1955 1956 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1957 { 1958 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1959 const SetupProcessor::State &state = draw.setupState; 1960 const DrawData &data = *draw.data; 1961 1962 Vertex &v = triangle.v0; 1963 1964 float pSize; 1965 1966 int pts = state.pointSizeRegister; 1967 1968 if(state.pointSizeRegister != Unused) 1969 { 1970 pSize = v.v[pts].y; 1971 } 1972 else 1973 { 1974 pSize = data.point.pointSize[0]; 1975 } 1976 1977 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); 1978 1979 float4 P[4]; 1980 int C[4]; 1981 1982 int pos = state.positionRegister; 1983 1984 P[0] = v.v[pos]; 1985 P[1] = v.v[pos]; 1986 P[2] = v.v[pos]; 1987 P[3] = v.v[pos]; 1988 1989 const float X = pSize * P[0].w * data.halfPixelX[0]; 1990 const float Y = pSize * P[0].w * data.halfPixelY[0]; 1991 1992 P[0].x -= X; 1993 P[0].y += Y; 1994 C[0] = clipper->computeClipFlags(P[0]); 1995 1996 P[1].x += X; 1997 P[1].y += Y; 1998 C[1] = clipper->computeClipFlags(P[1]); 1999 2000 P[2].x += X; 2001 P[2].y -= Y; 2002 C[2] = clipper->computeClipFlags(P[2]); 2003 2004 P[3].x -= X; 2005 P[3].y -= Y; 2006 C[3] = clipper->computeClipFlags(P[3]); 2007 2008 triangle.v1 = triangle.v0; 2009 triangle.v2 = triangle.v0; 2010 2011 triangle.v1.X += iround(16 * 0.5f * pSize); 2012 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 2013 2014 Polygon polygon(P, 4); 2015 2016 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 2017 { 2018 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 2019 2020 if(clipFlagsOr != Clipper::CLIP_FINITE) 2021 { 2022 if(!clipper->clip(polygon, clipFlagsOr, draw)) 2023 { 2024 return false; 2025 } 2026 } 2027 2028 return setupRoutine(&primitive, &triangle, &polygon, &data); 2029 } 2030 2031 return false; 2032 } 2033 2034 void Renderer::initializeThreads() 2035 { 2036 unitCount = ceilPow2(threadCount); 2037 clusterCount = ceilPow2(threadCount); 2038 2039 for(int i = 0; i < unitCount; i++) 2040 { 2041 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 2042 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 2043 } 2044 2045 for(int i = 0; i < threadCount; i++) 2046 { 2047 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 2048 vertexTask[i]->vertexCache.drawCall = -1; 2049 2050 task[i].type = Task::SUSPEND; 2051 2052 resume[i] = new Event(); 2053 suspend[i] = new Event(); 2054 2055 Parameters parameters; 2056 parameters.threadIndex = i; 2057 parameters.renderer = this; 2058 2059 exitThreads = false; 2060 worker[i] = new Thread(threadFunction, ¶meters); 2061 2062 suspend[i]->wait(); 2063 suspend[i]->signal(); 2064 } 2065 } 2066 2067 void Renderer::terminateThreads() 2068 { 2069 while(threadsAwake != 0) 2070 { 2071 Thread::sleep(1); 2072 } 2073 2074 for(int thread = 0; thread < threadCount; thread++) 2075 { 2076 if(worker[thread]) 2077 { 2078 exitThreads = true; 2079 resume[thread]->signal(); 2080 worker[thread]->join(); 2081 2082 delete worker[thread]; 2083 worker[thread] = 0; 2084 delete resume[thread]; 2085 resume[thread] = 0; 2086 delete suspend[thread]; 2087 suspend[thread] = 0; 2088 } 2089 2090 deallocate(vertexTask[thread]); 2091 vertexTask[thread] = 0; 2092 } 2093 2094 for(int i = 0; i < 16; i++) 2095 { 2096 deallocate(triangleBatch[i]); 2097 triangleBatch[i] = 0; 2098 2099 deallocate(primitiveBatch[i]); 2100 primitiveBatch[i] = 0; 2101 } 2102 } 2103 2104 void Renderer::loadConstants(const VertexShader *vertexShader) 2105 { 2106 if(!vertexShader) return; 2107 2108 size_t count = vertexShader->getLength(); 2109 2110 for(size_t i = 0; i < count; i++) 2111 { 2112 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 2113 2114 if(instruction->opcode == Shader::OPCODE_DEF) 2115 { 2116 int index = instruction->dst.index; 2117 float value[4]; 2118 2119 value[0] = instruction->src[0].value[0]; 2120 value[1] = instruction->src[0].value[1]; 2121 value[2] = instruction->src[0].value[2]; 2122 value[3] = instruction->src[0].value[3]; 2123 2124 setVertexShaderConstantF(index, value); 2125 } 2126 else if(instruction->opcode == Shader::OPCODE_DEFI) 2127 { 2128 int index = instruction->dst.index; 2129 int integer[4]; 2130 2131 integer[0] = instruction->src[0].integer[0]; 2132 integer[1] = instruction->src[0].integer[1]; 2133 integer[2] = instruction->src[0].integer[2]; 2134 integer[3] = instruction->src[0].integer[3]; 2135 2136 setVertexShaderConstantI(index, integer); 2137 } 2138 else if(instruction->opcode == Shader::OPCODE_DEFB) 2139 { 2140 int index = instruction->dst.index; 2141 int boolean = instruction->src[0].boolean[0]; 2142 2143 setVertexShaderConstantB(index, &boolean); 2144 } 2145 } 2146 } 2147 2148 void Renderer::loadConstants(const PixelShader *pixelShader) 2149 { 2150 if(!pixelShader) return; 2151 2152 size_t count = pixelShader->getLength(); 2153 2154 for(size_t i = 0; i < count; i++) 2155 { 2156 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 2157 2158 if(instruction->opcode == Shader::OPCODE_DEF) 2159 { 2160 int index = instruction->dst.index; 2161 float value[4]; 2162 2163 value[0] = instruction->src[0].value[0]; 2164 value[1] = instruction->src[0].value[1]; 2165 value[2] = instruction->src[0].value[2]; 2166 value[3] = instruction->src[0].value[3]; 2167 2168 setPixelShaderConstantF(index, value); 2169 } 2170 else if(instruction->opcode == Shader::OPCODE_DEFI) 2171 { 2172 int index = instruction->dst.index; 2173 int integer[4]; 2174 2175 integer[0] = instruction->src[0].integer[0]; 2176 integer[1] = instruction->src[0].integer[1]; 2177 integer[2] = instruction->src[0].integer[2]; 2178 integer[3] = instruction->src[0].integer[3]; 2179 2180 setPixelShaderConstantI(index, integer); 2181 } 2182 else if(instruction->opcode == Shader::OPCODE_DEFB) 2183 { 2184 int index = instruction->dst.index; 2185 int boolean = instruction->src[0].boolean[0]; 2186 2187 setPixelShaderConstantB(index, &boolean); 2188 } 2189 } 2190 } 2191 2192 void Renderer::setIndexBuffer(Resource *indexBuffer) 2193 { 2194 context->indexBuffer = indexBuffer; 2195 } 2196 2197 void Renderer::setMultiSampleMask(unsigned int mask) 2198 { 2199 context->sampleMask = mask; 2200 } 2201 2202 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 2203 { 2204 sw::transparencyAntialiasing = transparencyAntialiasing; 2205 } 2206 2207 bool Renderer::isReadWriteTexture(int sampler) 2208 { 2209 for(int index = 0; index < RENDERTARGETS; index++) 2210 { 2211 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 2212 { 2213 return true; 2214 } 2215 } 2216 2217 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 2218 { 2219 return true; 2220 } 2221 2222 return false; 2223 } 2224 2225 void Renderer::updateClipper() 2226 { 2227 if(updateClipPlanes) 2228 { 2229 if(VertexProcessor::isFixedFunction()) // User plane in world space 2230 { 2231 const Matrix &scissorWorld = getViewTransform(); 2232 2233 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; 2234 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; 2235 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; 2236 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; 2237 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; 2238 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; 2239 } 2240 else // User plane in clip space 2241 { 2242 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 2243 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 2244 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 2245 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 2246 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 2247 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 2248 } 2249 2250 updateClipPlanes = false; 2251 } 2252 } 2253 2254 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 2255 { 2256 ASSERT(sampler < TOTAL_IMAGE_UNITS); 2257 2258 context->texture[sampler] = resource; 2259 } 2260 2261 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 2262 { 2263 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 2264 2265 context->sampler[sampler].setTextureLevel(face, level, surface, type); 2266 } 2267 2268 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 2269 { 2270 if(type == SAMPLER_PIXEL) 2271 { 2272 PixelProcessor::setTextureFilter(sampler, textureFilter); 2273 } 2274 else 2275 { 2276 VertexProcessor::setTextureFilter(sampler, textureFilter); 2277 } 2278 } 2279 2280 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 2281 { 2282 if(type == SAMPLER_PIXEL) 2283 { 2284 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 2285 } 2286 else 2287 { 2288 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 2289 } 2290 } 2291 2292 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 2293 { 2294 if(type == SAMPLER_PIXEL) 2295 { 2296 PixelProcessor::setGatherEnable(sampler, enable); 2297 } 2298 else 2299 { 2300 VertexProcessor::setGatherEnable(sampler, enable); 2301 } 2302 } 2303 2304 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 2305 { 2306 if(type == SAMPLER_PIXEL) 2307 { 2308 PixelProcessor::setAddressingModeU(sampler, addressMode); 2309 } 2310 else 2311 { 2312 VertexProcessor::setAddressingModeU(sampler, addressMode); 2313 } 2314 } 2315 2316 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 2317 { 2318 if(type == SAMPLER_PIXEL) 2319 { 2320 PixelProcessor::setAddressingModeV(sampler, addressMode); 2321 } 2322 else 2323 { 2324 VertexProcessor::setAddressingModeV(sampler, addressMode); 2325 } 2326 } 2327 2328 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 2329 { 2330 if(type == SAMPLER_PIXEL) 2331 { 2332 PixelProcessor::setAddressingModeW(sampler, addressMode); 2333 } 2334 else 2335 { 2336 VertexProcessor::setAddressingModeW(sampler, addressMode); 2337 } 2338 } 2339 2340 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 2341 { 2342 if(type == SAMPLER_PIXEL) 2343 { 2344 PixelProcessor::setReadSRGB(sampler, sRGB); 2345 } 2346 else 2347 { 2348 VertexProcessor::setReadSRGB(sampler, sRGB); 2349 } 2350 } 2351 2352 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 2353 { 2354 if(type == SAMPLER_PIXEL) 2355 { 2356 PixelProcessor::setMipmapLOD(sampler, bias); 2357 } 2358 else 2359 { 2360 VertexProcessor::setMipmapLOD(sampler, bias); 2361 } 2362 } 2363 2364 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 2365 { 2366 if(type == SAMPLER_PIXEL) 2367 { 2368 PixelProcessor::setBorderColor(sampler, borderColor); 2369 } 2370 else 2371 { 2372 VertexProcessor::setBorderColor(sampler, borderColor); 2373 } 2374 } 2375 2376 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 2377 { 2378 if(type == SAMPLER_PIXEL) 2379 { 2380 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2381 } 2382 else 2383 { 2384 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2385 } 2386 } 2387 2388 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) 2389 { 2390 if(type == SAMPLER_PIXEL) 2391 { 2392 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2393 } 2394 else 2395 { 2396 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2397 } 2398 } 2399 2400 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2401 { 2402 if(type == SAMPLER_PIXEL) 2403 { 2404 PixelProcessor::setSwizzleR(sampler, swizzleR); 2405 } 2406 else 2407 { 2408 VertexProcessor::setSwizzleR(sampler, swizzleR); 2409 } 2410 } 2411 2412 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2413 { 2414 if(type == SAMPLER_PIXEL) 2415 { 2416 PixelProcessor::setSwizzleG(sampler, swizzleG); 2417 } 2418 else 2419 { 2420 VertexProcessor::setSwizzleG(sampler, swizzleG); 2421 } 2422 } 2423 2424 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2425 { 2426 if(type == SAMPLER_PIXEL) 2427 { 2428 PixelProcessor::setSwizzleB(sampler, swizzleB); 2429 } 2430 else 2431 { 2432 VertexProcessor::setSwizzleB(sampler, swizzleB); 2433 } 2434 } 2435 2436 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2437 { 2438 if(type == SAMPLER_PIXEL) 2439 { 2440 PixelProcessor::setSwizzleA(sampler, swizzleA); 2441 } 2442 else 2443 { 2444 VertexProcessor::setSwizzleA(sampler, swizzleA); 2445 } 2446 } 2447 2448 void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) 2449 { 2450 if(type == SAMPLER_PIXEL) 2451 { 2452 PixelProcessor::setCompareFunc(sampler, compFunc); 2453 } 2454 else 2455 { 2456 VertexProcessor::setCompareFunc(sampler, compFunc); 2457 } 2458 } 2459 2460 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) 2461 { 2462 if(type == SAMPLER_PIXEL) 2463 { 2464 PixelProcessor::setBaseLevel(sampler, baseLevel); 2465 } 2466 else 2467 { 2468 VertexProcessor::setBaseLevel(sampler, baseLevel); 2469 } 2470 } 2471 2472 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) 2473 { 2474 if(type == SAMPLER_PIXEL) 2475 { 2476 PixelProcessor::setMaxLevel(sampler, maxLevel); 2477 } 2478 else 2479 { 2480 VertexProcessor::setMaxLevel(sampler, maxLevel); 2481 } 2482 } 2483 2484 void Renderer::setMinLod(SamplerType type, int sampler, float minLod) 2485 { 2486 if(type == SAMPLER_PIXEL) 2487 { 2488 PixelProcessor::setMinLod(sampler, minLod); 2489 } 2490 else 2491 { 2492 VertexProcessor::setMinLod(sampler, minLod); 2493 } 2494 } 2495 2496 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) 2497 { 2498 if(type == SAMPLER_PIXEL) 2499 { 2500 PixelProcessor::setMaxLod(sampler, maxLod); 2501 } 2502 else 2503 { 2504 VertexProcessor::setMaxLod(sampler, maxLod); 2505 } 2506 } 2507 2508 void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired) 2509 { 2510 if(type == SAMPLER_PIXEL) 2511 { 2512 PixelProcessor::setSyncRequired(sampler, syncRequired); 2513 } 2514 else 2515 { 2516 VertexProcessor::setSyncRequired(sampler, syncRequired); 2517 } 2518 } 2519 2520 void Renderer::setPointSpriteEnable(bool pointSpriteEnable) 2521 { 2522 context->setPointSpriteEnable(pointSpriteEnable); 2523 } 2524 2525 void Renderer::setPointScaleEnable(bool pointScaleEnable) 2526 { 2527 context->setPointScaleEnable(pointScaleEnable); 2528 } 2529 2530 void Renderer::setLineWidth(float width) 2531 { 2532 context->lineWidth = width; 2533 } 2534 2535 void Renderer::setDepthBias(float bias) 2536 { 2537 context->depthBias = bias; 2538 } 2539 2540 void Renderer::setSlopeDepthBias(float slopeBias) 2541 { 2542 context->slopeDepthBias = slopeBias; 2543 } 2544 2545 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2546 { 2547 context->rasterizerDiscard = rasterizerDiscard; 2548 } 2549 2550 void Renderer::setPixelShader(const PixelShader *shader) 2551 { 2552 context->pixelShader = shader; 2553 2554 loadConstants(shader); 2555 } 2556 2557 void Renderer::setVertexShader(const VertexShader *shader) 2558 { 2559 context->vertexShader = shader; 2560 2561 loadConstants(shader); 2562 } 2563 2564 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2565 { 2566 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2567 { 2568 if(drawCall[i]->psDirtyConstF < index + count) 2569 { 2570 drawCall[i]->psDirtyConstF = index + count; 2571 } 2572 } 2573 2574 for(unsigned int i = 0; i < count; i++) 2575 { 2576 PixelProcessor::setFloatConstant(index + i, value); 2577 value += 4; 2578 } 2579 } 2580 2581 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2582 { 2583 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2584 { 2585 if(drawCall[i]->psDirtyConstI < index + count) 2586 { 2587 drawCall[i]->psDirtyConstI = index + count; 2588 } 2589 } 2590 2591 for(unsigned int i = 0; i < count; i++) 2592 { 2593 PixelProcessor::setIntegerConstant(index + i, value); 2594 value += 4; 2595 } 2596 } 2597 2598 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2599 { 2600 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2601 { 2602 if(drawCall[i]->psDirtyConstB < index + count) 2603 { 2604 drawCall[i]->psDirtyConstB = index + count; 2605 } 2606 } 2607 2608 for(unsigned int i = 0; i < count; i++) 2609 { 2610 PixelProcessor::setBooleanConstant(index + i, *boolean); 2611 boolean++; 2612 } 2613 } 2614 2615 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2616 { 2617 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2618 { 2619 if(drawCall[i]->vsDirtyConstF < index + count) 2620 { 2621 drawCall[i]->vsDirtyConstF = index + count; 2622 } 2623 } 2624 2625 for(unsigned int i = 0; i < count; i++) 2626 { 2627 VertexProcessor::setFloatConstant(index + i, value); 2628 value += 4; 2629 } 2630 } 2631 2632 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2633 { 2634 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2635 { 2636 if(drawCall[i]->vsDirtyConstI < index + count) 2637 { 2638 drawCall[i]->vsDirtyConstI = index + count; 2639 } 2640 } 2641 2642 for(unsigned int i = 0; i < count; i++) 2643 { 2644 VertexProcessor::setIntegerConstant(index + i, value); 2645 value += 4; 2646 } 2647 } 2648 2649 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2650 { 2651 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2652 { 2653 if(drawCall[i]->vsDirtyConstB < index + count) 2654 { 2655 drawCall[i]->vsDirtyConstB = index + count; 2656 } 2657 } 2658 2659 for(unsigned int i = 0; i < count; i++) 2660 { 2661 VertexProcessor::setBooleanConstant(index + i, *boolean); 2662 boolean++; 2663 } 2664 } 2665 2666 void Renderer::setModelMatrix(const Matrix &M, int i) 2667 { 2668 VertexProcessor::setModelMatrix(M, i); 2669 } 2670 2671 void Renderer::setViewMatrix(const Matrix &V) 2672 { 2673 VertexProcessor::setViewMatrix(V); 2674 updateClipPlanes = true; 2675 } 2676 2677 void Renderer::setBaseMatrix(const Matrix &B) 2678 { 2679 VertexProcessor::setBaseMatrix(B); 2680 updateClipPlanes = true; 2681 } 2682 2683 void Renderer::setProjectionMatrix(const Matrix &P) 2684 { 2685 VertexProcessor::setProjectionMatrix(P); 2686 updateClipPlanes = true; 2687 } 2688 2689 void Renderer::addQuery(Query *query) 2690 { 2691 queries.push_back(query); 2692 } 2693 2694 void Renderer::removeQuery(Query *query) 2695 { 2696 queries.remove(query); 2697 } 2698 2699 #if PERF_HUD 2700 int Renderer::getThreadCount() 2701 { 2702 return threadCount; 2703 } 2704 2705 int64_t Renderer::getVertexTime(int thread) 2706 { 2707 return vertexTime[thread]; 2708 } 2709 2710 int64_t Renderer::getSetupTime(int thread) 2711 { 2712 return setupTime[thread]; 2713 } 2714 2715 int64_t Renderer::getPixelTime(int thread) 2716 { 2717 return pixelTime[thread]; 2718 } 2719 2720 void Renderer::resetTimers() 2721 { 2722 for(int thread = 0; thread < threadCount; thread++) 2723 { 2724 vertexTime[thread] = 0; 2725 setupTime[thread] = 0; 2726 pixelTime[thread] = 0; 2727 } 2728 } 2729 #endif 2730 2731 void Renderer::setViewport(const Viewport &viewport) 2732 { 2733 this->viewport = viewport; 2734 } 2735 2736 void Renderer::setScissor(const Rect &scissor) 2737 { 2738 this->scissor = scissor; 2739 } 2740 2741 void Renderer::setClipFlags(int flags) 2742 { 2743 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2744 } 2745 2746 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2747 { 2748 if(index < MAX_CLIP_PLANES) 2749 { 2750 userPlane[index] = plane; 2751 } 2752 else ASSERT(false); 2753 2754 updateClipPlanes = true; 2755 } 2756 2757 void Renderer::updateConfiguration(bool initialUpdate) 2758 { 2759 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2760 2761 if(newConfiguration || initialUpdate) 2762 { 2763 terminateThreads(); 2764 2765 SwiftConfig::Configuration configuration = {}; 2766 swiftConfig->getConfiguration(configuration); 2767 2768 precacheVertex = !newConfiguration && configuration.precache; 2769 precacheSetup = !newConfiguration && configuration.precache; 2770 precachePixel = !newConfiguration && configuration.precache; 2771 2772 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2773 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2774 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2775 2776 switch(configuration.textureSampleQuality) 2777 { 2778 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2779 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2780 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2781 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2782 } 2783 2784 switch(configuration.mipmapQuality) 2785 { 2786 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2787 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2788 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2789 } 2790 2791 setPerspectiveCorrection(configuration.perspectiveCorrection); 2792 2793 switch(configuration.transcendentalPrecision) 2794 { 2795 case 0: 2796 logPrecision = APPROXIMATE; 2797 expPrecision = APPROXIMATE; 2798 rcpPrecision = APPROXIMATE; 2799 rsqPrecision = APPROXIMATE; 2800 break; 2801 case 1: 2802 logPrecision = PARTIAL; 2803 expPrecision = PARTIAL; 2804 rcpPrecision = PARTIAL; 2805 rsqPrecision = PARTIAL; 2806 break; 2807 case 2: 2808 logPrecision = ACCURATE; 2809 expPrecision = ACCURATE; 2810 rcpPrecision = ACCURATE; 2811 rsqPrecision = ACCURATE; 2812 break; 2813 case 3: 2814 logPrecision = WHQL; 2815 expPrecision = WHQL; 2816 rcpPrecision = WHQL; 2817 rsqPrecision = WHQL; 2818 break; 2819 case 4: 2820 logPrecision = IEEE; 2821 expPrecision = IEEE; 2822 rcpPrecision = IEEE; 2823 rsqPrecision = IEEE; 2824 break; 2825 default: 2826 logPrecision = ACCURATE; 2827 expPrecision = ACCURATE; 2828 rcpPrecision = ACCURATE; 2829 rsqPrecision = ACCURATE; 2830 break; 2831 } 2832 2833 switch(configuration.transparencyAntialiasing) 2834 { 2835 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2836 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2837 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2838 } 2839 2840 switch(configuration.threadCount) 2841 { 2842 case -1: threadCount = CPUID::coreCount(); break; 2843 case 0: threadCount = CPUID::processAffinity(); break; 2844 default: threadCount = configuration.threadCount; break; 2845 } 2846 2847 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2848 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2849 CPUID::setEnableSSE3(configuration.enableSSE3); 2850 CPUID::setEnableSSE2(configuration.enableSSE2); 2851 CPUID::setEnableSSE(configuration.enableSSE); 2852 2853 for(int pass = 0; pass < 10; pass++) 2854 { 2855 optimization[pass] = configuration.optimization[pass]; 2856 } 2857 2858 forceWindowed = configuration.forceWindowed; 2859 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2860 postBlendSRGB = configuration.postBlendSRGB; 2861 exactColorRounding = configuration.exactColorRounding; 2862 forceClearRegisters = configuration.forceClearRegisters; 2863 2864 #ifndef NDEBUG 2865 minPrimitives = configuration.minPrimitives; 2866 maxPrimitives = configuration.maxPrimitives; 2867 #endif 2868 } 2869 2870 if(!initialUpdate && !worker[0]) 2871 { 2872 initializeThreads(); 2873 } 2874 } 2875 } 2876