1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "VertexRoutine.hpp" 16 17 #include "VertexShader.hpp" 18 #include "Constants.hpp" 19 #include "Renderer/Vertex.hpp" 20 #include "Renderer/Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Debug.hpp" 23 24 namespace sw 25 { 26 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 27 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 28 29 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) 30 : v(shader && shader->dynamicallyIndexedInput), 31 o(shader && shader->dynamicallyIndexedOutput), 32 state(state) 33 { 34 } 35 36 VertexRoutine::~VertexRoutine() 37 { 38 } 39 40 void VertexRoutine::generate() 41 { 42 const bool textureSampling = state.textureSampling; 43 44 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); 45 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); 46 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); 47 48 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); 49 UInt primitiveNumber = *Pointer<UInt>(task + OFFSET(VertexTask, primitiveStart)); 50 UInt indexInPrimitive = 0; 51 52 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); 53 54 Do 55 { 56 UInt index = *Pointer<UInt>(batch); 57 UInt tagIndex = index & 0x0000003C; 58 UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. 59 60 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) 61 { 62 *Pointer<UInt>(tagCache + tagIndex) = indexQ; 63 64 readInput(indexQ); 65 pipeline(indexQ); 66 postTransform(); 67 computeClipFlags(); 68 69 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex)); 70 writeCache(cacheLine0); 71 } 72 73 UInt cacheIndex = index & 0x0000003F; 74 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); 75 writeVertex(vertex, cacheLine); 76 77 if(state.transformFeedbackEnabled != 0) 78 { 79 transformFeedback(vertex, primitiveNumber, indexInPrimitive); 80 81 indexInPrimitive++; 82 If(indexInPrimitive == 3) 83 { 84 primitiveNumber++; 85 indexInPrimitive = 0; 86 } 87 } 88 89 vertex += sizeof(Vertex); 90 batch += sizeof(unsigned int); 91 vertexCount--; 92 } 93 Until(vertexCount == 0) 94 95 Return(); 96 } 97 98 void VertexRoutine::readInput(UInt &index) 99 { 100 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 101 { 102 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i); 103 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); 104 105 v[i] = readStream(input, stride, state.input[i], index); 106 } 107 } 108 109 void VertexRoutine::computeClipFlags() 110 { 111 int pos = state.positionRegister; 112 113 Int4 maxX = CmpLT(o[pos].w, o[pos].x); 114 Int4 maxY = CmpLT(o[pos].w, o[pos].y); 115 Int4 maxZ = CmpLT(o[pos].w, o[pos].z); 116 Int4 minX = CmpNLE(-o[pos].w, o[pos].x); 117 Int4 minY = CmpNLE(-o[pos].w, o[pos].y); 118 Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z); 119 120 clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing 121 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); 122 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4); 123 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4); 124 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); 125 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); 126 127 Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 128 Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 129 Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); 130 131 Int4 finiteXYZ = finiteX & finiteY & finiteZ; 132 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); 133 134 if(state.preTransformed) 135 { 136 clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane 137 } 138 } 139 140 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) 141 { 142 const bool textureSampling = state.textureSampling; 143 144 Vector4f v; 145 146 Pointer<Byte> source0 = buffer + index * stride; 147 Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0); 148 Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0); 149 Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0); 150 151 bool isNativeFloatAttrib = (stream.attribType == VertexShader::ATTRIBTYPE_FLOAT) || stream.normalized; 152 153 switch(stream.type) 154 { 155 case STREAMTYPE_FLOAT: 156 { 157 if(stream.count == 0) 158 { 159 // Null stream, all default components 160 } 161 else 162 { 163 if(stream.count == 1) 164 { 165 v.x.x = *Pointer<Float>(source0); 166 v.x.y = *Pointer<Float>(source1); 167 v.x.z = *Pointer<Float>(source2); 168 v.x.w = *Pointer<Float>(source3); 169 } 170 else 171 { 172 v.x = *Pointer<Float4>(source0); 173 v.y = *Pointer<Float4>(source1); 174 v.z = *Pointer<Float4>(source2); 175 v.w = *Pointer<Float4>(source3); 176 177 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 178 } 179 180 switch(stream.attribType) 181 { 182 case VertexShader::ATTRIBTYPE_INT: 183 if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); 184 if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); 185 if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); 186 if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); 187 break; 188 case VertexShader::ATTRIBTYPE_UINT: 189 if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); 190 if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); 191 if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); 192 if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); 193 break; 194 default: 195 break; 196 } 197 } 198 } 199 break; 200 case STREAMTYPE_BYTE: 201 if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float 202 { 203 v.x = Float4(*Pointer<Byte4>(source0)); 204 v.y = Float4(*Pointer<Byte4>(source1)); 205 v.z = Float4(*Pointer<Byte4>(source2)); 206 v.w = Float4(*Pointer<Byte4>(source3)); 207 208 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 209 210 if(stream.normalized) 211 { 212 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 213 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 214 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 215 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 216 } 217 } 218 else // Stream: UByte, Shader attrib: Int / UInt 219 { 220 v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); 221 v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); 222 v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); 223 v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); 224 225 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 226 } 227 break; 228 case STREAMTYPE_SBYTE: 229 if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float 230 { 231 v.x = Float4(*Pointer<SByte4>(source0)); 232 v.y = Float4(*Pointer<SByte4>(source1)); 233 v.z = Float4(*Pointer<SByte4>(source2)); 234 v.w = Float4(*Pointer<SByte4>(source3)); 235 236 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 237 238 if(stream.normalized) 239 { 240 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 241 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 242 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 243 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); 244 } 245 } 246 else // Stream: SByte, Shader attrib: Int / UInt 247 { 248 v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); 249 v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); 250 v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); 251 v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); 252 253 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 254 } 255 break; 256 case STREAMTYPE_COLOR: 257 { 258 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 259 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 260 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 261 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); 262 263 transpose4x4(v.x, v.y, v.z, v.w); 264 265 // Swap red and blue 266 Float4 t = v.x; 267 v.x = v.z; 268 v.z = t; 269 } 270 break; 271 case STREAMTYPE_SHORT: 272 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 273 { 274 v.x = Float4(*Pointer<Short4>(source0)); 275 v.y = Float4(*Pointer<Short4>(source1)); 276 v.z = Float4(*Pointer<Short4>(source2)); 277 v.w = Float4(*Pointer<Short4>(source3)); 278 279 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 280 281 if(stream.normalized) 282 { 283 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 284 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 285 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 286 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); 287 } 288 } 289 else // Stream: Short, Shader attrib: Int/UInt, no type conversion 290 { 291 v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); 292 v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); 293 v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); 294 v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); 295 296 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 297 } 298 break; 299 case STREAMTYPE_USHORT: 300 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 301 { 302 v.x = Float4(*Pointer<UShort4>(source0)); 303 v.y = Float4(*Pointer<UShort4>(source1)); 304 v.z = Float4(*Pointer<UShort4>(source2)); 305 v.w = Float4(*Pointer<UShort4>(source3)); 306 307 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 308 309 if(stream.normalized) 310 { 311 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 312 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 313 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 314 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); 315 } 316 } 317 else // Stream: UShort, Shader attrib: Int/UInt, no type conversion 318 { 319 v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); 320 v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); 321 v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); 322 v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); 323 324 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 325 } 326 break; 327 case STREAMTYPE_INT: 328 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float 329 { 330 v.x = Float4(*Pointer<Int4>(source0)); 331 v.y = Float4(*Pointer<Int4>(source1)); 332 v.z = Float4(*Pointer<Int4>(source2)); 333 v.w = Float4(*Pointer<Int4>(source3)); 334 335 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 336 337 if(stream.normalized) 338 { 339 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 340 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 341 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 342 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); 343 } 344 } 345 else // Stream: Int, Shader attrib: Int/UInt, no type conversion 346 { 347 v.x = *Pointer<Float4>(source0); 348 v.y = *Pointer<Float4>(source1); 349 v.z = *Pointer<Float4>(source2); 350 v.w = *Pointer<Float4>(source3); 351 352 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 353 } 354 break; 355 case STREAMTYPE_UINT: 356 if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float 357 { 358 v.x = Float4(*Pointer<UInt4>(source0)); 359 v.y = Float4(*Pointer<UInt4>(source1)); 360 v.z = Float4(*Pointer<UInt4>(source2)); 361 v.w = Float4(*Pointer<UInt4>(source3)); 362 363 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 364 365 if(stream.normalized) 366 { 367 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 368 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 369 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 370 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); 371 } 372 } 373 else // Stream: UInt, Shader attrib: Int/UInt, no type conversion 374 { 375 v.x = *Pointer<Float4>(source0); 376 v.y = *Pointer<Float4>(source1); 377 v.z = *Pointer<Float4>(source2); 378 v.w = *Pointer<Float4>(source3); 379 380 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 381 } 382 break; 383 case STREAMTYPE_UDEC3: 384 { 385 // FIXME: Vectorize 386 { 387 Int x, y, z; 388 389 x = y = z = *Pointer<Int>(source0); 390 391 v.x.x = Float(x & 0x000003FF); 392 v.x.y = Float(y & 0x000FFC00); 393 v.x.z = Float(z & 0x3FF00000); 394 } 395 396 { 397 Int x, y, z; 398 399 x = y = z = *Pointer<Int>(source1); 400 401 v.y.x = Float(x & 0x000003FF); 402 v.y.y = Float(y & 0x000FFC00); 403 v.y.z = Float(z & 0x3FF00000); 404 } 405 406 { 407 Int x, y, z; 408 409 x = y = z = *Pointer<Int>(source2); 410 411 v.z.x = Float(x & 0x000003FF); 412 v.z.y = Float(y & 0x000FFC00); 413 v.z.z = Float(z & 0x3FF00000); 414 } 415 416 { 417 Int x, y, z; 418 419 x = y = z = *Pointer<Int>(source3); 420 421 v.w.x = Float(x & 0x000003FF); 422 v.w.y = Float(y & 0x000FFC00); 423 v.w.z = Float(z & 0x3FF00000); 424 } 425 426 transpose4x3(v.x, v.y, v.z, v.w); 427 428 v.y *= Float4(1.0f / 0x00000400); 429 v.z *= Float4(1.0f / 0x00100000); 430 } 431 break; 432 case STREAMTYPE_DEC3N: 433 { 434 // FIXME: Vectorize 435 { 436 Int x, y, z; 437 438 x = y = z = *Pointer<Int>(source0); 439 440 v.x.x = Float((x << 22) & 0xFFC00000); 441 v.x.y = Float((y << 12) & 0xFFC00000); 442 v.x.z = Float((z << 2) & 0xFFC00000); 443 } 444 445 { 446 Int x, y, z; 447 448 x = y = z = *Pointer<Int>(source1); 449 450 v.y.x = Float((x << 22) & 0xFFC00000); 451 v.y.y = Float((y << 12) & 0xFFC00000); 452 v.y.z = Float((z << 2) & 0xFFC00000); 453 } 454 455 { 456 Int x, y, z; 457 458 x = y = z = *Pointer<Int>(source2); 459 460 v.z.x = Float((x << 22) & 0xFFC00000); 461 v.z.y = Float((y << 12) & 0xFFC00000); 462 v.z.z = Float((z << 2) & 0xFFC00000); 463 } 464 465 { 466 Int x, y, z; 467 468 x = y = z = *Pointer<Int>(source3); 469 470 v.w.x = Float((x << 22) & 0xFFC00000); 471 v.w.y = Float((y << 12) & 0xFFC00000); 472 v.w.z = Float((z << 2) & 0xFFC00000); 473 } 474 475 transpose4x3(v.x, v.y, v.z, v.w); 476 477 v.x *= Float4(1.0f / 0x00400000 / 511.0f); 478 v.y *= Float4(1.0f / 0x00400000 / 511.0f); 479 v.z *= Float4(1.0f / 0x00400000 / 511.0f); 480 } 481 break; 482 case STREAMTYPE_FIXED: 483 { 484 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 485 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 486 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 487 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); 488 489 transpose4xN(v.x, v.y, v.z, v.w, stream.count); 490 } 491 break; 492 case STREAMTYPE_HALF: 493 { 494 if(stream.count >= 1) 495 { 496 UShort x0 = *Pointer<UShort>(source0 + 0); 497 UShort x1 = *Pointer<UShort>(source1 + 0); 498 UShort x2 = *Pointer<UShort>(source2 + 0); 499 UShort x3 = *Pointer<UShort>(source3 + 0); 500 501 v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); 502 v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); 503 v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); 504 v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); 505 } 506 507 if(stream.count >= 2) 508 { 509 UShort y0 = *Pointer<UShort>(source0 + 2); 510 UShort y1 = *Pointer<UShort>(source1 + 2); 511 UShort y2 = *Pointer<UShort>(source2 + 2); 512 UShort y3 = *Pointer<UShort>(source3 + 2); 513 514 v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); 515 v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); 516 v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); 517 v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); 518 } 519 520 if(stream.count >= 3) 521 { 522 UShort z0 = *Pointer<UShort>(source0 + 4); 523 UShort z1 = *Pointer<UShort>(source1 + 4); 524 UShort z2 = *Pointer<UShort>(source2 + 4); 525 UShort z3 = *Pointer<UShort>(source3 + 4); 526 527 v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); 528 v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); 529 v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); 530 v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); 531 } 532 533 if(stream.count >= 4) 534 { 535 UShort w0 = *Pointer<UShort>(source0 + 6); 536 UShort w1 = *Pointer<UShort>(source1 + 6); 537 UShort w2 = *Pointer<UShort>(source2 + 6); 538 UShort w3 = *Pointer<UShort>(source3 + 6); 539 540 v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); 541 v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); 542 v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); 543 v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); 544 } 545 } 546 break; 547 case STREAMTYPE_INDICES: 548 { 549 v.x.x = *Pointer<Float>(source0); 550 v.x.y = *Pointer<Float>(source1); 551 v.x.z = *Pointer<Float>(source2); 552 v.x.w = *Pointer<Float>(source3); 553 } 554 break; 555 case STREAMTYPE_2_10_10_10_INT: 556 { 557 Int4 src; 558 src = Insert(src, *Pointer<Int>(source0), 0); 559 src = Insert(src, *Pointer<Int>(source1), 1); 560 src = Insert(src, *Pointer<Int>(source2), 2); 561 src = Insert(src, *Pointer<Int>(source3), 3); 562 563 v.x = Float4((src << 22) >> 22); 564 v.y = Float4((src << 12) >> 22); 565 v.z = Float4((src << 02) >> 22); 566 v.w = Float4(src >> 30); 567 568 if(stream.normalized) 569 { 570 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); 571 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); 572 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); 573 v.w = Max(v.w, Float4(-1.0f)); 574 } 575 } 576 break; 577 case STREAMTYPE_2_10_10_10_UINT: 578 { 579 Int4 src; 580 src = Insert(src, *Pointer<Int>(source0), 0); 581 src = Insert(src, *Pointer<Int>(source1), 1); 582 src = Insert(src, *Pointer<Int>(source2), 2); 583 src = Insert(src, *Pointer<Int>(source3), 3); 584 585 v.x = Float4(src & Int4(0x3FF)); 586 v.y = Float4((src >> 10) & Int4(0x3FF)); 587 v.z = Float4((src >> 20) & Int4(0x3FF)); 588 v.w = Float4((src >> 30) & Int4(0x3)); 589 590 if(stream.normalized) 591 { 592 v.x *= Float4(1.0f / 0x3FF); 593 v.y *= Float4(1.0f / 0x3FF); 594 v.z *= Float4(1.0f / 0x3FF); 595 v.w *= Float4(1.0f / 0x3); 596 } 597 } 598 break; 599 default: 600 ASSERT(false); 601 } 602 603 if(stream.count < 1) v.x = Float4(0.0f); 604 if(stream.count < 2) v.y = Float4(0.0f); 605 if(stream.count < 3) v.z = Float4(0.0f); 606 if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(0)); 607 608 return v; 609 } 610 611 void VertexRoutine::postTransform() 612 { 613 int pos = state.positionRegister; 614 615 // Backtransform 616 if(state.preTransformed) 617 { 618 Float4 rhw = Float4(1.0f) / o[pos].w; 619 620 Float4 W = *Pointer<Float4>(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); 621 Float4 H = *Pointer<Float4>(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); 622 Float4 L = *Pointer<Float4>(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); 623 Float4 T = *Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); 624 625 o[pos].x = (o[pos].x - L) / W * rhw; 626 o[pos].y = (o[pos].y - T) / H * rhw; 627 o[pos].z = o[pos].z * rhw; 628 o[pos].w = rhw; 629 } 630 631 if(!halfIntegerCoordinates && !state.preTransformed) 632 { 633 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w; 634 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w; 635 } 636 637 if(state.superSampling) 638 { 639 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w; 640 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w; 641 } 642 } 643 644 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine) 645 { 646 Vector4f v; 647 648 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 649 { 650 if(state.output[i].write) 651 { 652 v.x = o[i].x; 653 v.y = o[i].y; 654 v.z = o[i].z; 655 v.w = o[i].w; 656 657 if(state.output[i].xClamp) 658 { 659 v.x = Max(v.x, Float4(0.0f)); 660 v.x = Min(v.x, Float4(1.0f)); 661 } 662 663 if(state.output[i].yClamp) 664 { 665 v.y = Max(v.y, Float4(0.0f)); 666 v.y = Min(v.y, Float4(1.0f)); 667 } 668 669 if(state.output[i].zClamp) 670 { 671 v.z = Max(v.z, Float4(0.0f)); 672 v.z = Min(v.z, Float4(1.0f)); 673 } 674 675 if(state.output[i].wClamp) 676 { 677 v.w = Max(v.w, Float4(0.0f)); 678 v.w = Min(v.w, Float4(1.0f)); 679 } 680 681 if(state.output[i].write == 0x01) 682 { 683 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x; 684 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y; 685 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z; 686 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w; 687 } 688 else 689 { 690 if(state.output[i].write == 0x03) 691 { 692 transpose2x4(v.x, v.y, v.z, v.w); 693 } 694 else 695 { 696 transpose4x4(v.x, v.y, v.z, v.w); 697 } 698 699 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x; 700 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y; 701 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z; 702 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w; 703 } 704 } 705 } 706 707 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; 708 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF; 709 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF; 710 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; 711 712 // Viewport transform 713 int pos = state.positionRegister; 714 715 v.x = o[pos].x; 716 v.y = o[pos].y; 717 v.z = o[pos].z; 718 v.w = o[pos].w; 719 720 if(symmetricNormalizedDepth) 721 { 722 v.z = (v.z + v.w) * Float4(0.5f); // [-1, 1] -> [0, 1] 723 } 724 725 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); 726 Float4 rhw = Float4(1.0f) / w; 727 728 v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); 729 v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); 730 v.z = v.z * rhw; 731 v.w = rhw; 732 733 transpose4x4(v.x, v.y, v.z, v.w); 734 735 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; 736 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; 737 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; 738 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; 739 } 740 741 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache) 742 { 743 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) 744 { 745 if(state.output[i].write) 746 { 747 *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16); 748 } 749 } 750 751 *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X)); 752 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags)); 753 } 754 755 void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive) 756 { 757 If(indexInPrimitive < state.verticesPerPrimitive) 758 { 759 UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive; 760 761 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 762 { 763 if(state.transformFeedbackEnabled & (1ULL << i)) 764 { 765 UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i])); 766 UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i])); 767 UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i])); 768 UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i])); 769 770 Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float)); 771 Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float); 772 773 For(UInt r = 0, r < row, r++) 774 { 775 UInt rOffsetX = r * col * sizeof(float); 776 UInt rOffset4 = r * sizeof(float4); 777 778 For(UInt c = 0, c < col, c++) 779 { 780 UInt cOffset = c * sizeof(float); 781 *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset); 782 } 783 } 784 } 785 } 786 } 787 } 788 } 789