Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "VertexPipeline.hpp"
     16 
     17 #include "Renderer/Vertex.hpp"
     18 #include "Renderer/Renderer.hpp"
     19 #include "Common/Debug.hpp"
     20 
     21 #include <string.h>
     22 #include <stdlib.h>
     23 #include <stdio.h>
     24 
     25 #undef max
     26 #undef min
     27 
     28 namespace sw
     29 {
     30 	extern bool secondaryColor;
     31 
     32 	VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0)
     33 	{
     34 	}
     35 
     36 	VertexPipeline::~VertexPipeline()
     37 	{
     38 	}
     39 
     40 	Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
     41 	{
     42 		Vector4f dst;
     43 
     44 		if(state.vertexBlendMatrixCount == 0)
     45 		{
     46 			dst = transform(src, matrix, homogeneous);
     47 		}
     48 		else
     49 		{
     50 			UInt index0[4];
     51 			UInt index1[4];
     52 			UInt index2[4];
     53 			UInt index3[4];
     54 
     55 			if(state.indexedVertexBlendEnable)
     56 			{
     57 				for(int i = 0; i < 4; i++)
     58 				{
     59 					Float4 B = v[BlendIndices].x;
     60 					UInt indices;
     61 
     62 					switch(i)
     63 					{
     64 					case 0: indices = As<UInt>(Float(B.x)); break;
     65 					case 1: indices = As<UInt>(Float(B.y)); break;
     66 					case 2: indices = As<UInt>(Float(B.z)); break;
     67 					case 3: indices = As<UInt>(Float(B.w)); break;
     68 					}
     69 
     70 					index0[i] = (indices & 0x000000FF) << 6;
     71 					index1[i] = (indices & 0x0000FF00) >> 2;
     72 					index2[i] = (indices & 0x00FF0000) >> 10;
     73 					index3[i] = (indices & 0xFF000000) >> 18;
     74 				}
     75 			}
     76 			else
     77 			{
     78 				for(int i = 0; i < 4; i++)
     79 				{
     80 					index0[i] = 0 * 64;
     81 					index1[i] = 1 * 64;
     82 					index2[i] = 2 * 64;
     83 					index3[i] = 3 * 64;
     84 				}
     85 			}
     86 
     87 			Float4 weight0;
     88 			Float4 weight1;
     89 			Float4 weight2;
     90 			Float4 weight3;
     91 
     92 			switch(state.vertexBlendMatrixCount)
     93 			{
     94 			case 4: weight2 = v[BlendWeight].z;
     95 			case 3: weight1 = v[BlendWeight].y;
     96 			case 2: weight0 = v[BlendWeight].x;
     97 			case 1:
     98 				break;
     99 			}
    100 
    101 			if(state.vertexBlendMatrixCount == 1)
    102 			{
    103 				dst = transform(src, matrix, index0, homogeneous);
    104 			}
    105 			else if(state.vertexBlendMatrixCount == 2)
    106 			{
    107 				weight1 = Float4(1.0f) - weight0;
    108 
    109 				Vector4f pos0;
    110 				Vector4f pos1;
    111 
    112 				pos0 = transform(src, matrix, index0, homogeneous);
    113 				pos1 = transform(src, matrix, index1, homogeneous);
    114 
    115 				dst.x = pos0.x * weight0 + pos1.x * weight1;   // FIXME: Vector4f operators
    116 				dst.y = pos0.y * weight0 + pos1.y * weight1;
    117 				dst.z = pos0.z * weight0 + pos1.z * weight1;
    118 				dst.w = pos0.w * weight0 + pos1.w * weight1;
    119 			}
    120 			else if(state.vertexBlendMatrixCount == 3)
    121 			{
    122 				weight2 = Float4(1.0f) - (weight0 + weight1);
    123 
    124 				Vector4f pos0;
    125 				Vector4f pos1;
    126 				Vector4f pos2;
    127 
    128 				pos0 = transform(src, matrix, index0, homogeneous);
    129 				pos1 = transform(src, matrix, index1, homogeneous);
    130 				pos2 = transform(src, matrix, index2, homogeneous);
    131 
    132 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2;
    133 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2;
    134 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2;
    135 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2;
    136 			}
    137 			else if(state.vertexBlendMatrixCount == 4)
    138 			{
    139 				weight3 = Float4(1.0f) - (weight0 + weight1 + weight2);
    140 
    141 				Vector4f pos0;
    142 				Vector4f pos1;
    143 				Vector4f pos2;
    144 				Vector4f pos3;
    145 
    146 				pos0 = transform(src, matrix, index0, homogeneous);
    147 				pos1 = transform(src, matrix, index1, homogeneous);
    148 				pos2 = transform(src, matrix, index2, homogeneous);
    149 				pos3 = transform(src, matrix, index3, homogeneous);
    150 
    151 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3;
    152 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3;
    153 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3;
    154 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3;
    155 			}
    156 		}
    157 
    158 		return dst;
    159 	}
    160 
    161 	void VertexPipeline::pipeline(UInt &index)
    162 	{
    163 		Vector4f position;
    164 		Vector4f normal;
    165 
    166 		if(!state.preTransformed)
    167 		{
    168 			position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true);
    169 		}
    170 		else
    171 		{
    172 			position = v[PositionT];
    173 		}
    174 
    175 		o[Pos].x = position.x;
    176 		o[Pos].y = position.y;
    177 		o[Pos].z = position.z;
    178 		o[Pos].w = position.w;
    179 
    180 		Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
    181 
    182 		if(state.vertexNormalActive)
    183 		{
    184 			normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false);
    185 
    186 			if(state.normalizeNormals)
    187 			{
    188 				normal = normalize(normal);
    189 			}
    190 		}
    191 
    192 		if(!state.vertexLightingActive)
    193 		{
    194 			// FIXME: Don't process if not used at all
    195 			if(state.diffuseActive && state.input[Color0])
    196 			{
    197 				Vector4f diffuse = v[Color0];
    198 
    199 				o[C0].x = diffuse.x;
    200 				o[C0].y = diffuse.y;
    201 				o[C0].z = diffuse.z;
    202 				o[C0].w = diffuse.w;
    203 			}
    204 			else
    205 			{
    206 				o[C0].x = Float4(1.0f);
    207 				o[C0].y = Float4(1.0f);
    208 				o[C0].z = Float4(1.0f);
    209 				o[C0].w = Float4(1.0f);
    210 			}
    211 
    212 			// FIXME: Don't process if not used at all
    213 			if(state.specularActive && state.input[Color1])
    214 			{
    215 				Vector4f specular = v[Color1];
    216 
    217 				o[C1].x = specular.x;
    218 				o[C1].y = specular.y;
    219 				o[C1].z = specular.z;
    220 				o[C1].w = specular.w;
    221 			}
    222 			else
    223 			{
    224 				o[C1].x = Float4(0.0f);
    225 				o[C1].y = Float4(0.0f);
    226 				o[C1].z = Float4(0.0f);
    227 				o[C1].w = Float4(1.0f);
    228 			}
    229 		}
    230 		else
    231 		{
    232 			o[C0].x = Float4(0.0f);
    233 			o[C0].y = Float4(0.0f);
    234 			o[C0].z = Float4(0.0f);
    235 			o[C0].w = Float4(0.0f);
    236 
    237 			o[C1].x = Float4(0.0f);
    238 			o[C1].y = Float4(0.0f);
    239 			o[C1].z = Float4(0.0f);
    240 			o[C1].w = Float4(0.0f);
    241 
    242 			Vector4f ambient;
    243 			Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient));   // FIXME: Unpack
    244 
    245 			ambient.x = globalAmbient.x;
    246 			ambient.y = globalAmbient.y;
    247 			ambient.z = globalAmbient.z;
    248 
    249 			for(int i = 0; i < 8; i++)
    250 			{
    251 				if(!(state.vertexLightActive & (1 << i)))
    252 				{
    253 					continue;
    254 				}
    255 
    256 				Vector4f L;    // Light vector
    257 				Float4 att;   // Attenuation
    258 
    259 				// Attenuation
    260 				{
    261 					Float4 d;   // Distance
    262 
    263 					L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i]));   // FIXME: Unpack
    264 					L.x = L.x.xxxx;
    265 					L.y = L.y.yyyy;
    266 					L.z = L.z.zzzz;
    267 
    268 					L.x -= vertexPosition.x;
    269 					L.y -= vertexPosition.y;
    270 					L.z -= vertexPosition.z;
    271 					d = dot3(L, L);
    272 					d = RcpSqrt_pp(d);     // FIXME: Sufficient precision?
    273 					L.x *= d;
    274 					L.y *= d;
    275 					L.z *= d;
    276 					d = Rcp_pp(d);       // FIXME: Sufficient precision?
    277 
    278 					Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
    279 					Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i]));
    280 					Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i]));
    281 
    282 					att = Rcp_pp((q * d + l) * d + c);
    283 				}
    284 
    285 				// Ambient per light
    286 				{
    287 					Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i]));   // FIXME: Unpack
    288 
    289 					ambient.x = ambient.x + lightAmbient.x * att;
    290 					ambient.y = ambient.y + lightAmbient.y * att;
    291 					ambient.z = ambient.z + lightAmbient.z * att;
    292 				}
    293 
    294 				// Diffuse
    295 				if(state.vertexNormalActive)
    296 				{
    297 					Float4 dot;
    298 
    299 					dot = dot3(L, normal);
    300 					dot = Max(dot, Float4(0.0f));
    301 					dot *= att;
    302 
    303 					Vector4f diff;
    304 
    305 					if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
    306 					{
    307 						diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse));   // FIXME: Unpack
    308 						diff.x = diff.x.xxxx;
    309 						diff.y = diff.y.yyyy;
    310 						diff.z = diff.z.zzzz;
    311 					}
    312 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
    313 					{
    314 						diff = v[Color0];
    315 					}
    316 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
    317 					{
    318 						diff = v[Color1];
    319 					}
    320 					else ASSERT(false);
    321 
    322 					Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i]));
    323 
    324 					o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x;   // FIXME: Clamp first?
    325 					o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y;   // FIXME: Clamp first?
    326 					o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z;   // FIXME: Clamp first?
    327 				}
    328 
    329 				// Specular
    330 				if(state.vertexSpecularActive)
    331 				{
    332 					Vector4f S;
    333 					Vector4f C;   // Camera vector
    334 					Float4 pow;
    335 
    336 					pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess));
    337 
    338 					S.x = Float4(0.0f) - vertexPosition.x;
    339 					S.y = Float4(0.0f) - vertexPosition.y;
    340 					S.z = Float4(0.0f) - vertexPosition.z;
    341 					C = normalize(S);
    342 
    343 					S.x = L.x + C.x;
    344 					S.y = L.y + C.y;
    345 					S.z = L.z + C.z;
    346 					C = normalize(S);
    347 
    348 					Float4 dot = Max(dot3(C, normal), Float4(0.0f));   // FIXME: max(dot3(C, normal), 0)
    349 
    350 					Float4 P = power(dot, pow);
    351 					P *= att;
    352 
    353 					Vector4f spec;
    354 
    355 					if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
    356 					{
    357 						Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular));   // FIXME: Unpack
    358 
    359 						spec.x = materialSpecular.x;
    360 						spec.y = materialSpecular.y;
    361 						spec.z = materialSpecular.z;
    362 					}
    363 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
    364 					{
    365 						spec = v[Color0];
    366 					}
    367 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
    368 					{
    369 						spec = v[Color1];
    370 					}
    371 					else ASSERT(false);
    372 
    373 					Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i]));
    374 
    375 					spec.x *= lightSpecular.x;
    376 					spec.y *= lightSpecular.y;
    377 					spec.z *= lightSpecular.z;
    378 
    379 					spec.x *= P;
    380 					spec.y *= P;
    381 					spec.z *= P;
    382 
    383 					spec.x = Max(spec.x, Float4(0.0f));
    384 					spec.y = Max(spec.y, Float4(0.0f));
    385 					spec.z = Max(spec.z, Float4(0.0f));
    386 
    387 					if(secondaryColor)
    388 					{
    389 						o[C1].x = o[C1].x + spec.x;
    390 						o[C1].y = o[C1].y + spec.y;
    391 						o[C1].z = o[C1].z + spec.z;
    392 					}
    393 					else
    394 					{
    395 						o[C0].x = o[C0].x + spec.x;
    396 						o[C0].y = o[C0].y + spec.y;
    397 						o[C0].z = o[C0].z + spec.z;
    398 					}
    399 				}
    400 			}
    401 
    402 			if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL)
    403 			{
    404 				Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient));   // FIXME: Unpack
    405 
    406 				ambient.x = ambient.x * materialAmbient.x;
    407 				ambient.y = ambient.y * materialAmbient.y;
    408 				ambient.z = ambient.z * materialAmbient.z;
    409 			}
    410 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1)
    411 			{
    412 				Vector4f materialDiffuse = v[Color0];
    413 
    414 				ambient.x = ambient.x * materialDiffuse.x;
    415 				ambient.y = ambient.y * materialDiffuse.y;
    416 				ambient.z = ambient.z * materialDiffuse.z;
    417 			}
    418 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2)
    419 			{
    420 				Vector4f materialSpecular = v[Color1];
    421 
    422 				ambient.x = ambient.x * materialSpecular.x;
    423 				ambient.y = ambient.y * materialSpecular.y;
    424 				ambient.z = ambient.z * materialSpecular.z;
    425 			}
    426 			else ASSERT(false);
    427 
    428 			o[C0].x = o[C0].x + ambient.x;
    429 			o[C0].y = o[C0].y + ambient.y;
    430 			o[C0].z = o[C0].z + ambient.z;
    431 
    432 			// Emissive
    433 			if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL)
    434 			{
    435 				Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission));   // FIXME: Unpack
    436 
    437 				o[C0].x = o[C0].x + materialEmission.x;
    438 				o[C0].y = o[C0].y + materialEmission.y;
    439 				o[C0].z = o[C0].z + materialEmission.z;
    440 			}
    441 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1)
    442 			{
    443 				Vector4f materialSpecular = v[Color0];
    444 
    445 				o[C0].x = o[C0].x + materialSpecular.x;
    446 				o[C0].y = o[C0].y + materialSpecular.y;
    447 				o[C0].z = o[C0].z + materialSpecular.z;
    448 			}
    449 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2)
    450 			{
    451 				Vector4f materialSpecular = v[Color1];
    452 
    453 				o[C0].x = o[C0].x + materialSpecular.x;
    454 				o[C0].y = o[C0].y + materialSpecular.y;
    455 				o[C0].z = o[C0].z + materialSpecular.z;
    456 			}
    457 			else ASSERT(false);
    458 
    459 			// Diffuse alpha component
    460 			if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
    461 			{
    462 				o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww;   // FIXME: Unpack
    463 			}
    464 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
    465 			{
    466 				Vector4f alpha = v[Color0];
    467 				o[C0].w = alpha.w;
    468 			}
    469 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
    470 			{
    471 				Vector4f alpha = v[Color1];
    472 				o[C0].w = alpha.w;
    473 			}
    474 			else ASSERT(false);
    475 
    476 			if(state.vertexSpecularActive)
    477 			{
    478 				// Specular alpha component
    479 				if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
    480 				{
    481 					o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww;   // FIXME: Unpack
    482 				}
    483 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
    484 				{
    485 					Vector4f alpha = v[Color0];
    486 					o[C1].w = alpha.w;
    487 				}
    488 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
    489 				{
    490 					Vector4f alpha = v[Color1];
    491 					o[C1].w = alpha.w;
    492 				}
    493 				else ASSERT(false);
    494 			}
    495 		}
    496 
    497 		if(state.fogActive)
    498 		{
    499 			Float4 f;
    500 
    501 			if(!state.rangeFogActive)
    502 			{
    503 				f = Abs(vertexPosition.z);
    504 			}
    505 			else
    506 			{
    507 				f = Sqrt(dot3(vertexPosition, vertexPosition));   // FIXME: f = length(vertexPosition);
    508 			}
    509 
    510 			switch(state.vertexFogMode)
    511 			{
    512 			case FOG_NONE:
    513 				if(state.specularActive)
    514 				{
    515 					o[Fog].x = o[C1].w;
    516 				}
    517 				else
    518 				{
    519 					o[Fog].x = Float4(0.0f);
    520 				}
    521 				break;
    522 			case FOG_LINEAR:
    523 				o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
    524 				break;
    525 			case FOG_EXP:
    526 				o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true);
    527 				break;
    528 			case FOG_EXP2:
    529 				o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true);
    530 				break;
    531 			default:
    532 				ASSERT(false);
    533 			}
    534 		}
    535 
    536 		for(int stage = 0; stage < 8; stage++)
    537 		{
    538 			processTextureCoordinate(stage, normal, position);
    539 		}
    540 
    541 		processPointSize();
    542 	}
    543 
    544 	void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position)
    545 	{
    546 		if(state.output[T0 + stage].write)
    547 		{
    548 			int i = state.textureState[stage].texCoordIndexActive;
    549 
    550 			switch(state.textureState[stage].texGenActive)
    551 			{
    552 			case TEXGEN_NONE:
    553 				{
    554 					Vector4f &&varying = v[TexCoord0 + i];
    555 
    556 					o[T0 + stage].x = varying.x;
    557 					o[T0 + stage].y = varying.y;
    558 					o[T0 + stage].z = varying.z;
    559 					o[T0 + stage].w = varying.w;
    560 				}
    561 				break;
    562 			case TEXGEN_PASSTHRU:
    563 				{
    564 					Vector4f &&varying = v[TexCoord0 + i];
    565 
    566 					o[T0 + stage].x = varying.x;
    567 					o[T0 + stage].y = varying.y;
    568 					o[T0 + stage].z = varying.z;
    569 					o[T0 + stage].w = varying.w;
    570 
    571 					if(state.input[TexCoord0 + i])
    572 					{
    573 						switch(state.input[TexCoord0 + i].count)
    574 						{
    575 						case 1:
    576 							o[T0 + stage].y = Float4(1.0f);
    577 							o[T0 + stage].z = Float4(0.0f);
    578 							o[T0 + stage].w = Float4(0.0f);
    579 							break;
    580 						case 2:
    581 							o[T0 + stage].z = Float4(1.0f);
    582 							o[T0 + stage].w = Float4(0.0f);
    583 							break;
    584 						case 3:
    585 							o[T0 + stage].w = Float4(1.0f);
    586 							break;
    587 						case 4:
    588 							break;
    589 						default:
    590 							ASSERT(false);
    591 						}
    592 					}
    593 				}
    594 				break;
    595 			case TEXGEN_NORMAL:
    596 				{
    597 					Vector4f Nc;   // Normal vector in camera space
    598 
    599 					if(state.vertexNormalActive)
    600 					{
    601 						Nc = normal;
    602 					}
    603 					else
    604 					{
    605 						Nc.x = Float4(0.0f);
    606 						Nc.y = Float4(0.0f);
    607 						Nc.z = Float4(0.0f);
    608 					}
    609 
    610 					Nc.w = Float4(1.0f);
    611 
    612 					o[T0 + stage].x = Nc.x;
    613 					o[T0 + stage].y = Nc.y;
    614 					o[T0 + stage].z = Nc.z;
    615 					o[T0 + stage].w = Nc.w;
    616 				}
    617 				break;
    618 			case TEXGEN_POSITION:
    619 				{
    620 					Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);   // Position in camera space
    621 
    622 					Pn.w = Float4(1.0f);
    623 
    624 					o[T0 + stage].x = Pn.x;
    625 					o[T0 + stage].y = Pn.y;
    626 					o[T0 + stage].z = Pn.z;
    627 					o[T0 + stage].w = Pn.w;
    628 				}
    629 				break;
    630 			case TEXGEN_REFLECTION:
    631 				{
    632 					Vector4f R;   // Reflection vector
    633 
    634 					if(state.vertexNormalActive)
    635 					{
    636 						Vector4f Nc;   // Normal vector in camera space
    637 
    638 						Nc = normal;
    639 
    640 						if(state.localViewerActive)
    641 						{
    642 							Vector4f Ec;   // Eye vector in camera space
    643 							Vector4f N2;
    644 
    645 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
    646 							Ec = normalize(Ec);
    647 
    648 							// R = E - 2 * N * (E . N)
    649 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
    650 
    651 							R.x = Ec.x - Nc.x * dot;
    652 							R.y = Ec.y - Nc.y * dot;
    653 							R.z = Ec.z - Nc.z * dot;
    654 						}
    655 						else
    656 						{
    657 							// u = -2 * Nz * Nx
    658 							// v = -2 * Nz * Ny
    659 							// w = 1 - 2 * Nz * Nz
    660 
    661 							R.x = -Float4(2.0f) * Nc.z * Nc.x;
    662 							R.y = -Float4(2.0f) * Nc.z * Nc.y;
    663 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
    664 						}
    665 					}
    666 					else
    667 					{
    668 						R.x = Float4(0.0f);
    669 						R.y = Float4(0.0f);
    670 						R.z = Float4(0.0f);
    671 					}
    672 
    673 					R.w = Float4(1.0f);
    674 
    675 					o[T0 + stage].x = R.x;
    676 					o[T0 + stage].y = R.y;
    677 					o[T0 + stage].z = R.z;
    678 					o[T0 + stage].w = R.w;
    679 				}
    680 				break;
    681 			case TEXGEN_SPHEREMAP:
    682 				{
    683 					Vector4f R;   // Reflection vector
    684 
    685 					if(state.vertexNormalActive)
    686 					{
    687 						Vector4f Nc;   // Normal vector in camera space
    688 
    689 						Nc = normal;
    690 
    691 						if(state.localViewerActive)
    692 						{
    693 							Vector4f Ec;   // Eye vector in camera space
    694 							Vector4f N2;
    695 
    696 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
    697 							Ec = normalize(Ec);
    698 
    699 							// R = E - 2 * N * (E . N)
    700 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
    701 
    702 							R.x = Ec.x - Nc.x * dot;
    703 							R.y = Ec.y - Nc.y * dot;
    704 							R.z = Ec.z - Nc.z * dot;
    705 						}
    706 						else
    707 						{
    708 							// u = -2 * Nz * Nx
    709 							// v = -2 * Nz * Ny
    710 							// w = 1 - 2 * Nz * Nz
    711 
    712 							R.x = -Float4(2.0f) * Nc.z * Nc.x;
    713 							R.y = -Float4(2.0f) * Nc.z * Nc.y;
    714 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
    715 						}
    716 					}
    717 					else
    718 					{
    719 						R.x = Float4(0.0f);
    720 						R.y = Float4(0.0f);
    721 						R.z = Float4(0.0f);
    722 					}
    723 
    724 					R.z -= Float4(1.0f);
    725 					R = normalize(R);
    726 					R.x = Float4(0.5f) * R.x + Float4(0.5f);
    727 					R.y = Float4(0.5f) * R.y + Float4(0.5f);
    728 
    729 					R.z = Float4(1.0f);
    730 					R.w = Float4(0.0f);
    731 
    732 					o[T0 + stage].x = R.x;
    733 					o[T0 + stage].y = R.y;
    734 					o[T0 + stage].z = R.z;
    735 					o[T0 + stage].w = R.w;
    736 				}
    737 				break;
    738 			default:
    739 				ASSERT(false);
    740 			}
    741 
    742 			Vector4f texTrans0;
    743 			Vector4f texTrans1;
    744 			Vector4f texTrans2;
    745 			Vector4f texTrans3;
    746 
    747 			Vector4f T;
    748 			Vector4f t;
    749 
    750 			T.x = o[T0 + stage].x;
    751 			T.y = o[T0 + stage].y;
    752 			T.z = o[T0 + stage].z;
    753 			T.w = o[T0 + stage].w;
    754 
    755 			switch(state.textureState[stage].textureTransformCountActive)
    756 			{
    757 			case 4:
    758 				texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3]));   // FIXME: Unpack
    759 				texTrans3.x = texTrans3.x.xxxx;
    760 				texTrans3.y = texTrans3.y.yyyy;
    761 				texTrans3.z = texTrans3.z.zzzz;
    762 				texTrans3.w = texTrans3.w.wwww;
    763 				t.w = dot4(T, texTrans3);
    764 			case 3:
    765 				texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2]));   // FIXME: Unpack
    766 				texTrans2.x = texTrans2.x.xxxx;
    767 				texTrans2.y = texTrans2.y.yyyy;
    768 				texTrans2.z = texTrans2.z.zzzz;
    769 				texTrans2.w = texTrans2.w.wwww;
    770 				t.z = dot4(T, texTrans2);
    771 			case 2:
    772 				texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1]));   // FIXME: Unpack
    773 				texTrans1.x = texTrans1.x.xxxx;
    774 				texTrans1.y = texTrans1.y.yyyy;
    775 				texTrans1.z = texTrans1.z.zzzz;
    776 				texTrans1.w = texTrans1.w.wwww;
    777 				t.y = dot4(T, texTrans1);
    778 			case 1:
    779 				texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0]));   // FIXME: Unpack
    780 				texTrans0.x = texTrans0.x.xxxx;
    781 				texTrans0.y = texTrans0.y.yyyy;
    782 				texTrans0.z = texTrans0.z.zzzz;
    783 				texTrans0.w = texTrans0.w.wwww;
    784 				t.x = dot4(T, texTrans0);
    785 
    786 				o[T0 + stage].x = t.x;
    787 				o[T0 + stage].y = t.y;
    788 				o[T0 + stage].z = t.z;
    789 				o[T0 + stage].w = t.w;
    790 			case 0:
    791 				break;
    792 			default:
    793 				ASSERT(false);
    794 			}
    795 		}
    796 	}
    797 
    798 	void VertexPipeline::processPointSize()
    799 	{
    800 		if(!state.pointSizeActive)
    801 		{
    802 			return;   // Use global pointsize
    803 		}
    804 
    805 		if(state.input[PointSize])
    806 		{
    807 			o[Pts].y = v[PointSize].x;
    808 		}
    809 		else
    810 		{
    811 			o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize));
    812 		}
    813 
    814 		if(state.pointScaleActive && !state.preTransformed)
    815 		{
    816 			Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
    817 
    818 			Float4 d = Sqrt(dot3(p, p));   // FIXME: length(p);
    819 
    820 			Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA));   // FIXME: Unpack
    821 			Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB));   // FIXME: Unpack
    822 			Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC));   // FIXME: Unpack
    823 
    824 			A = RcpSqrt_pp(A + d * (B + d * C));
    825 
    826 			o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A;   // FIXME: Unpack
    827 		}
    828 	}
    829 
    830 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
    831 	{
    832 		Vector4f dst;
    833 
    834 		if(homogeneous)
    835 		{
    836 			Float4 m[4][4];
    837 
    838 			for(int j = 0; j < 4; j++)
    839 			{
    840 				for(int i = 0; i < 4; i++)
    841 				{
    842 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
    843 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
    844 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
    845 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
    846 				}
    847 			}
    848 
    849 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3];
    850 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3];
    851 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3];
    852 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3];
    853 		}
    854 		else
    855 		{
    856 			Float4 m[3][3];
    857 
    858 			for(int j = 0; j < 3; j++)
    859 			{
    860 				for(int i = 0; i < 3; i++)
    861 				{
    862 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
    863 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
    864 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
    865 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
    866 				}
    867 			}
    868 
    869 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
    870 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
    871 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
    872 		}
    873 
    874 		return dst;
    875 	}
    876 
    877 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous)
    878 	{
    879 		Vector4f dst;
    880 
    881 		if(homogeneous)
    882 		{
    883 			Float4 m[4][4];
    884 
    885 			for(int j = 0; j < 4; j++)
    886 			{
    887 				for(int i = 0; i < 4; i++)
    888 				{
    889 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
    890 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
    891 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
    892 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
    893 				}
    894 			}
    895 
    896 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3];
    897 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3];
    898 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3];
    899 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3];
    900 		}
    901 		else
    902 		{
    903 			Float4 m[3][3];
    904 
    905 			for(int j = 0; j < 3; j++)
    906 			{
    907 				for(int i = 0; i < 3; i++)
    908 				{
    909 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
    910 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
    911 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
    912 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
    913 				}
    914 			}
    915 
    916 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
    917 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
    918 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
    919 		}
    920 
    921 		return dst;
    922 	}
    923 
    924 	Vector4f VertexPipeline::normalize(Vector4f &src)
    925 	{
    926 		Vector4f dst;
    927 
    928 		Float4 rcpLength = RcpSqrt_pp(dot3(src, src));
    929 
    930 		dst.x = src.x * rcpLength;
    931 		dst.y = src.y * rcpLength;
    932 		dst.z = src.z * rcpLength;
    933 
    934 		return dst;
    935 	}
    936 
    937 	Float4 VertexPipeline::power(Float4 &src0, Float4 &src1)
    938 	{
    939 		Float4 dst = src0;
    940 
    941 		dst = dst * dst;
    942 		dst = dst * dst;
    943 		dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f)));
    944 
    945 		dst *= src1;
    946 
    947 		dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f)));
    948 		dst = RcpSqrt_pp(dst);
    949 		dst = RcpSqrt_pp(dst);
    950 
    951 		return dst;
    952 	}
    953 }
    954