Home | History | Annotate | Download | only in referencerenderer
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Reference Renderer
      3  * -----------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Reference renderer interface.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "rrRenderer.hpp"
     25 #include "tcuVectorUtil.hpp"
     26 #include "tcuTextureUtil.hpp"
     27 #include "tcuFloat.hpp"
     28 #include "rrPrimitiveAssembler.hpp"
     29 #include "rrFragmentOperations.hpp"
     30 #include "rrRasterizer.hpp"
     31 #include "deMemory.h"
     32 
     33 #include <set>
     34 
     35 namespace rr
     36 {
     37 namespace
     38 {
     39 
     40 typedef double ClipFloat; // floating point type used in clipping
     41 
     42 typedef tcu::Vector<ClipFloat, 4> ClipVec4;
     43 
     44 struct RasterizationInternalBuffers
     45 {
     46 	std::vector<FragmentPacket>		fragmentPackets;
     47 	std::vector<GenericVec4>		shaderOutputs;
     48 	std::vector<Fragment>			shadedFragments;
     49 	float*							fragmentDepthBuffer;
     50 };
     51 
     52 deUint32 readIndexArray (const IndexType type, const void* ptr, size_t ndx)
     53 {
     54 	switch (type)
     55 	{
     56 		case INDEXTYPE_UINT8:
     57 			return ((const deUint8*)ptr)[ndx];
     58 
     59 		case INDEXTYPE_UINT16:
     60 		{
     61 			deUint16 retVal;
     62 			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint16), sizeof(deUint16));
     63 
     64 			return retVal;
     65 		}
     66 
     67 		case INDEXTYPE_UINT32:
     68 		{
     69 			deUint32 retVal;
     70 			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint32), sizeof(deUint32));
     71 
     72 			return retVal;
     73 		}
     74 
     75 		default:
     76 			DE_ASSERT(false);
     77 			return 0;
     78 	}
     79 }
     80 
     81 tcu::IVec4 getBufferSize (const rr::MultisampleConstPixelBufferAccess& multisampleBuffer)
     82 {
     83 	return tcu::IVec4(0, 0, multisampleBuffer.raw().getHeight(), multisampleBuffer.raw().getDepth());
     84 }
     85 
     86 bool isEmpty (const rr::MultisampleConstPixelBufferAccess& access)
     87 {
     88 	return access.raw().getWidth() == 0 || access.raw().getHeight() == 0 || access.raw().getDepth() == 0;
     89 }
     90 
     91 struct DrawContext
     92 {
     93 	int primitiveID;
     94 
     95 	DrawContext (void)
     96 		: primitiveID(0)
     97 	{
     98 	}
     99 };
    100 
    101 /*--------------------------------------------------------------------*//*!
    102  * \brief Calculates intersection of two rects given as (left, bottom, width, height)
    103  *//*--------------------------------------------------------------------*/
    104 tcu::IVec4 rectIntersection (const tcu::IVec4& a, const tcu::IVec4& b)
    105 {
    106 	const tcu::IVec2 pos	= tcu::IVec2(de::max(a.x(), b.x()), de::max(a.y(), b.y()));
    107 	const tcu::IVec2 endPos	= tcu::IVec2(de::min(a.x() + a.z(), b.x() + b.z()), de::min(a.y() + a.w(), b.y() + b.w()));
    108 
    109 	return tcu::IVec4(pos.x(), pos.y(), endPos.x() - pos.x(), endPos.y() - pos.y());
    110 }
    111 
    112 void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::Triangle>& input)
    113 {
    114 	std::swap(output, input);
    115 }
    116 
    117 void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::Line>& input)
    118 {
    119 	std::swap(output, input);
    120 }
    121 
    122 void convertPrimitiveToBaseType(std::vector<pa::Point>& output, std::vector<pa::Point>& input)
    123 {
    124 	std::swap(output, input);
    125 }
    126 
    127 void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::LineAdjacency>& input)
    128 {
    129 	output.resize(input.size());
    130 	for (size_t i = 0; i < input.size(); ++i)
    131 	{
    132 		const int adjacentProvokingVertex	= input[i].provokingIndex;
    133 		const int baseProvokingVertexIndex	= adjacentProvokingVertex-1;
    134 		output[i] = pa::Line(input[i].v1, input[i].v2, baseProvokingVertexIndex);
    135 	}
    136 }
    137 
    138 void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::TriangleAdjacency>& input)
    139 {
    140 	output.resize(input.size());
    141 	for (size_t i = 0; i < input.size(); ++i)
    142 	{
    143 		const int adjacentProvokingVertex	= input[i].provokingIndex;
    144 		const int baseProvokingVertexIndex	= adjacentProvokingVertex/2;
    145 		output[i] = pa::Triangle(input[i].v0, input[i].v2, input[i].v4, baseProvokingVertexIndex);
    146 	}
    147 }
    148 
    149 namespace cliputil
    150 {
    151 
    152 /*--------------------------------------------------------------------*//*!
    153  * \brief Get clipped portion of the second endpoint
    154  *
    155  * Calculate the intersection of line segment v0-v1 and a given plane. Line
    156  * segment is defined by a pair of one-dimensional homogeneous coordinates.
    157  *
    158  *//*--------------------------------------------------------------------*/
    159 ClipFloat getSegmentVolumeEdgeClip (const ClipFloat v0,
    160 									const ClipFloat w0,
    161 									const ClipFloat v1,
    162 									const ClipFloat w1,
    163 									const ClipFloat plane)
    164 {
    165 	return (plane*w0 - v0) / ((v1 - v0) - plane*(w1 - w0));
    166 }
    167 
    168 /*--------------------------------------------------------------------*//*!
    169  * \brief Get clipped portion of the endpoint
    170  *
    171  * How much (in [0-1] range) of a line segment v0-v1 would be clipped
    172  * of the v0 end of the line segment by clipping.
    173  *//*--------------------------------------------------------------------*/
    174 ClipFloat getLineEndpointClipping (const ClipVec4& v0, const ClipVec4& v1)
    175 {
    176 	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
    177 
    178 	if (v0.z() > v0.w())
    179 	{
    180 		// Clip +Z
    181 		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), clipVolumeSize);
    182 	}
    183 	else if (v0.z() < -v0.w())
    184 	{
    185 		// Clip -Z
    186 		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), -clipVolumeSize);
    187 	}
    188 	else
    189 	{
    190 		// no clipping
    191 		return (ClipFloat)0.0;
    192 	}
    193 }
    194 
    195 ClipVec4 vec4ToClipVec4 (const tcu::Vec4& v)
    196 {
    197 	return ClipVec4((ClipFloat)v.x(), (ClipFloat)v.y(), (ClipFloat)v.z(), (ClipFloat)v.w());
    198 }
    199 
    200 tcu::Vec4 clipVec4ToVec4 (const ClipVec4& v)
    201 {
    202 	return tcu::Vec4((float)v.x(), (float)v.y(), (float)v.z(), (float)v.w());
    203 }
    204 
    205 class ClipVolumePlane
    206 {
    207 public:
    208 	virtual bool		pointInClipVolume			(const ClipVec4& p) const						= 0;
    209 	virtual ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
    210 	virtual ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
    211 };
    212 
    213 template <int Sign, int CompNdx>
    214 class ComponentPlane : public ClipVolumePlane
    215 {
    216 	DE_STATIC_ASSERT(Sign == +1 || Sign == -1);
    217 
    218 public:
    219 	bool		pointInClipVolume			(const ClipVec4& p) const;
    220 	ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const;
    221 	ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const;
    222 };
    223 
    224 template <int Sign, int CompNdx>
    225 bool ComponentPlane<Sign, CompNdx>::pointInClipVolume (const ClipVec4& p) const
    226 {
    227 	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
    228 
    229 	return (ClipFloat)(Sign * p[CompNdx]) <= clipVolumeSize * p.w();
    230 }
    231 
    232 template <int Sign, int CompNdx>
    233 ClipFloat ComponentPlane<Sign, CompNdx>::clipLineSegmentEnd (const ClipVec4& v0, const ClipVec4& v1) const
    234 {
    235 	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
    236 
    237 	return getSegmentVolumeEdgeClip(v0[CompNdx], v0.w(),
    238 									v1[CompNdx], v1.w(),
    239 									(ClipFloat)Sign * clipVolumeSize);
    240 }
    241 
    242 template <int Sign, int CompNdx>
    243 ClipVec4 ComponentPlane<Sign, CompNdx>::getLineIntersectionPoint (const ClipVec4& v0, const ClipVec4& v1) const
    244 {
    245 	// A point on line might be far away, causing clipping ratio (clipLineSegmentEnd) to become extremely close to 1.0
    246 	// even if the another point is not on the plane. Prevent clipping ratio from saturating by using points on line
    247 	// that are (nearly) on this and (nearly) on the opposite plane.
    248 
    249 	const ClipVec4 	clippedV0	= tcu::mix(v0, v1, ComponentPlane<+1, CompNdx>().clipLineSegmentEnd(v0, v1));
    250 	const ClipVec4 	clippedV1	= tcu::mix(v0, v1, ComponentPlane<-1, CompNdx>().clipLineSegmentEnd(v0, v1));
    251 	const ClipFloat	clipRatio	= clipLineSegmentEnd(clippedV0, clippedV1);
    252 
    253 	// Find intersection point of line from v0 to v1 and the current plane. Avoid ratios near 1.0
    254 	if (clipRatio <= (ClipFloat)0.5)
    255 		return tcu::mix(clippedV0, clippedV1, clipRatio);
    256 	else
    257 	{
    258 		const ClipFloat complementClipRatio = clipLineSegmentEnd(clippedV1, clippedV0);
    259 		return tcu::mix(clippedV1, clippedV0, complementClipRatio);
    260 	}
    261 }
    262 
    263 struct TriangleVertex
    264 {
    265 	ClipVec4	position;
    266 	ClipFloat	weight[3];		//!< barycentrics
    267 };
    268 
    269 struct SubTriangle
    270 {
    271 	TriangleVertex vertices[3];
    272 };
    273 
    274 void clipTriangleOneVertex (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& clipped, const TriangleVertex& v1, const TriangleVertex& v2)
    275 {
    276 	const ClipFloat	degenerateLimit = (ClipFloat)1.0;
    277 
    278 	// calc clip pos
    279 	TriangleVertex	mid1;
    280 	TriangleVertex	mid2;
    281 	bool			outputDegenerate = false;
    282 
    283 	{
    284 		const TriangleVertex&	inside	= v1;
    285 		const TriangleVertex&	outside	= clipped;
    286 		      TriangleVertex&	middle	= mid1;
    287 
    288 		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
    289 
    290 		if (hitDist >= degenerateLimit)
    291 		{
    292 			// do not generate degenerate triangles
    293 			outputDegenerate = true;
    294 		}
    295 		else
    296 		{
    297 			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
    298 			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
    299 
    300 			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
    301 			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
    302 			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
    303 			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
    304 		}
    305 	}
    306 
    307 	{
    308 		const TriangleVertex&	inside	= v2;
    309 		const TriangleVertex&	outside	= clipped;
    310 		      TriangleVertex&	middle	= mid2;
    311 
    312 		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
    313 
    314 		if (hitDist >= degenerateLimit)
    315 		{
    316 			// do not generate degenerate triangles
    317 			outputDegenerate = true;
    318 		}
    319 		else
    320 		{
    321 			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
    322 			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
    323 
    324 			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
    325 			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
    326 			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
    327 			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
    328 		}
    329 	}
    330 
    331 	if (!outputDegenerate)
    332 	{
    333 		// gen quad (v1) -> mid1 -> mid2 -> (v2)
    334 		clippedEdges.push_back(v1);
    335 		clippedEdges.push_back(mid1);
    336 		clippedEdges.push_back(mid2);
    337 		clippedEdges.push_back(v2);
    338 	}
    339 	else
    340 	{
    341 		// don't modify
    342 		clippedEdges.push_back(v1);
    343 		clippedEdges.push_back(clipped);
    344 		clippedEdges.push_back(v2);
    345 	}
    346 }
    347 
    348 void clipTriangleTwoVertices (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& v0, const TriangleVertex& clipped1, const TriangleVertex& clipped2)
    349 {
    350 	const ClipFloat	unclippableLimit = (ClipFloat)1.0;
    351 
    352 	// calc clip pos
    353 	TriangleVertex	mid1;
    354 	TriangleVertex	mid2;
    355 	bool			unclippableVertex1 = false;
    356 	bool			unclippableVertex2 = false;
    357 
    358 	{
    359 		const TriangleVertex&	inside	= v0;
    360 		const TriangleVertex&	outside	= clipped1;
    361 		      TriangleVertex&	middle	= mid1;
    362 
    363 		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
    364 
    365 		if (hitDist >= unclippableLimit)
    366 		{
    367 			// this edge cannot be clipped because the edge is really close to the volume boundary
    368 			unclippableVertex1 = true;
    369 		}
    370 		else
    371 		{
    372 			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
    373 			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
    374 
    375 			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
    376 			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
    377 			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
    378 			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
    379 		}
    380 	}
    381 
    382 	{
    383 		const TriangleVertex&	inside	= v0;
    384 		const TriangleVertex&	outside	= clipped2;
    385 		      TriangleVertex&	middle	= mid2;
    386 
    387 		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
    388 
    389 		if (hitDist >= unclippableLimit)
    390 		{
    391 			// this edge cannot be clipped because the edge is really close to the volume boundary
    392 			unclippableVertex2 = true;
    393 		}
    394 		else
    395 		{
    396 			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
    397 			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
    398 
    399 			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
    400 			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
    401 			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
    402 			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
    403 		}
    404 	}
    405 
    406 	if (!unclippableVertex1 && !unclippableVertex2)
    407 	{
    408 		// gen triangle (v0) -> mid1 -> mid2
    409 		clippedEdges.push_back(v0);
    410 		clippedEdges.push_back(mid1);
    411 		clippedEdges.push_back(mid2);
    412 	}
    413 	else if (!unclippableVertex1 && unclippableVertex2)
    414 	{
    415 		// clip just vertex 1
    416 		clippedEdges.push_back(v0);
    417 		clippedEdges.push_back(mid1);
    418 		clippedEdges.push_back(clipped2);
    419 	}
    420 	else if (unclippableVertex1 && !unclippableVertex2)
    421 	{
    422 		// clip just vertex 2
    423 		clippedEdges.push_back(v0);
    424 		clippedEdges.push_back(clipped1);
    425 		clippedEdges.push_back(mid2);
    426 	}
    427 	else
    428 	{
    429 		// don't modify
    430 		clippedEdges.push_back(v0);
    431 		clippedEdges.push_back(clipped1);
    432 		clippedEdges.push_back(clipped2);
    433 	}
    434 }
    435 
    436 void clipTriangleToPlane (std::vector<TriangleVertex>& clippedEdges, const TriangleVertex* vertices, const ClipVolumePlane& plane)
    437 {
    438 	const bool v0Clipped = !plane.pointInClipVolume(vertices[0].position);
    439 	const bool v1Clipped = !plane.pointInClipVolume(vertices[1].position);
    440 	const bool v2Clipped = !plane.pointInClipVolume(vertices[2].position);
    441 	const int  clipCount = ((v0Clipped) ? (1) : (0)) + ((v1Clipped) ? (1) : (0)) + ((v2Clipped) ? (1) : (0));
    442 
    443 	if (clipCount == 0)
    444 	{
    445 		// pass
    446 		clippedEdges.insert(clippedEdges.begin(), vertices, vertices + 3);
    447 	}
    448 	else if (clipCount == 1)
    449 	{
    450 		// clip one vertex
    451 		if (v0Clipped)			clipTriangleOneVertex(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
    452 		else if (v1Clipped)		clipTriangleOneVertex(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
    453 		else					clipTriangleOneVertex(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
    454 	}
    455 	else if (clipCount == 2)
    456 	{
    457 		// clip two vertices
    458 		if (!v0Clipped)			clipTriangleTwoVertices(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
    459 		else if (!v1Clipped)	clipTriangleTwoVertices(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
    460 		else					clipTriangleTwoVertices(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
    461 	}
    462 	else if (clipCount == 3)
    463 	{
    464 		// discard
    465 	}
    466 	else
    467 	{
    468 		DE_ASSERT(DE_FALSE);
    469 	}
    470 }
    471 
    472 } // cliputil
    473 
    474 tcu::Vec2 to2DCartesian (const tcu::Vec4& p)
    475 {
    476 	return tcu::Vec2(p.x(), p.y()) / p.w();
    477 }
    478 
    479 float cross2D (const tcu::Vec2& a, const tcu::Vec2& b)
    480 {
    481 	return tcu::cross(tcu::Vec3(a.x(), a.y(), 0.0f), tcu::Vec3(b.x(), b.y(), 0.0f)).z();
    482 }
    483 
    484 void flatshadePrimitiveVertices (pa::Triangle& target, size_t outputNdx)
    485 {
    486 	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
    487 	target.v0->outputs[outputNdx] = flatValue;
    488 	target.v1->outputs[outputNdx] = flatValue;
    489 	target.v2->outputs[outputNdx] = flatValue;
    490 }
    491 
    492 void flatshadePrimitiveVertices (pa::Line& target, size_t outputNdx)
    493 {
    494 	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
    495 	target.v0->outputs[outputNdx] = flatValue;
    496 	target.v1->outputs[outputNdx] = flatValue;
    497 }
    498 
    499 void flatshadePrimitiveVertices (pa::Point& target, size_t outputNdx)
    500 {
    501 	DE_UNREF(target);
    502 	DE_UNREF(outputNdx);
    503 }
    504 
    505 template <typename ContainerType>
    506 void flatshadeVertices (const Program& program, ContainerType& list)
    507 {
    508 	// flatshade
    509 	const std::vector<rr::VertexVaryingInfo>& fragInputs = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
    510 
    511 	for (size_t inputNdx = 0; inputNdx < fragInputs.size(); ++inputNdx)
    512 		if (fragInputs[inputNdx].flatshade)
    513 			for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
    514 				flatshadePrimitiveVertices(*it, inputNdx);
    515 }
    516 
    517 /*--------------------------------------------------------------------*//*!
    518  * Clip triangles to the clip volume.
    519  *//*--------------------------------------------------------------------*/
    520 void clipPrimitives (std::vector<pa::Triangle>&		list,
    521 					 const Program&					program,
    522 					 bool							clipWithZPlanes,
    523 					 VertexPacketAllocator&			vpalloc)
    524 {
    525 	using namespace cliputil;
    526 
    527 	cliputil::ComponentPlane<+1, 0> clipPosX;
    528 	cliputil::ComponentPlane<-1, 0> clipNegX;
    529 	cliputil::ComponentPlane<+1, 1> clipPosY;
    530 	cliputil::ComponentPlane<-1, 1> clipNegY;
    531 	cliputil::ComponentPlane<+1, 2> clipPosZ;
    532 	cliputil::ComponentPlane<-1, 2> clipNegZ;
    533 
    534 	const std::vector<rr::VertexVaryingInfo>&	fragInputs			= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
    535 	const ClipVolumePlane*						planes[]			= { &clipPosX, &clipNegX, &clipPosY, &clipNegY, &clipPosZ, &clipNegZ };
    536 	const int									numPlanes			= (clipWithZPlanes) ? (6) : (4);
    537 
    538 	std::vector<pa::Triangle>					outputTriangles;
    539 
    540 	for (int inputTriangleNdx = 0; inputTriangleNdx < (int)list.size(); ++inputTriangleNdx)
    541 	{
    542 		bool clippedByPlane[6];
    543 
    544 		// Needs clipping?
    545 		{
    546 			bool discardPrimitive	= false;
    547 			bool fullyInClipVolume	= true;
    548 
    549 			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
    550 			{
    551 				const ClipVolumePlane*	plane			= planes[planeNdx];
    552 				const bool				v0InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v0->position));
    553 				const bool				v1InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v1->position));
    554 				const bool				v2InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v2->position));
    555 
    556 				// Fully outside
    557 				if (!v0InsidePlane && !v1InsidePlane && !v2InsidePlane)
    558 				{
    559 					discardPrimitive = true;
    560 					break;
    561 				}
    562 				// Partially outside
    563 				else if (!v0InsidePlane || !v1InsidePlane || !v2InsidePlane)
    564 				{
    565 					clippedByPlane[planeNdx] = true;
    566 					fullyInClipVolume = false;
    567 				}
    568 				// Fully inside
    569 				else
    570 					clippedByPlane[planeNdx] = false;
    571 			}
    572 
    573 			if (discardPrimitive)
    574 				continue;
    575 
    576 			if (fullyInClipVolume)
    577 			{
    578 				outputTriangles.push_back(list[inputTriangleNdx]);
    579 				continue;
    580 			}
    581 		}
    582 
    583 		// Clip
    584 		{
    585 			std::vector<SubTriangle>	subTriangles	(1);
    586 			SubTriangle&				initialTri		= subTriangles[0];
    587 
    588 			initialTri.vertices[0].position = vec4ToClipVec4(list[inputTriangleNdx].v0->position);
    589 			initialTri.vertices[0].weight[0] = (ClipFloat)1.0;
    590 			initialTri.vertices[0].weight[1] = (ClipFloat)0.0;
    591 			initialTri.vertices[0].weight[2] = (ClipFloat)0.0;
    592 
    593 			initialTri.vertices[1].position = vec4ToClipVec4(list[inputTriangleNdx].v1->position);
    594 			initialTri.vertices[1].weight[0] = (ClipFloat)0.0;
    595 			initialTri.vertices[1].weight[1] = (ClipFloat)1.0;
    596 			initialTri.vertices[1].weight[2] = (ClipFloat)0.0;
    597 
    598 			initialTri.vertices[2].position = vec4ToClipVec4(list[inputTriangleNdx].v2->position);
    599 			initialTri.vertices[2].weight[0] = (ClipFloat)0.0;
    600 			initialTri.vertices[2].weight[1] = (ClipFloat)0.0;
    601 			initialTri.vertices[2].weight[2] = (ClipFloat)1.0;
    602 
    603 			// Clip all subtriangles to all relevant planes
    604 			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
    605 			{
    606 				std::vector<SubTriangle> nextPhaseSubTriangles;
    607 
    608 				if (!clippedByPlane[planeNdx])
    609 					continue;
    610 
    611 				for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
    612 				{
    613 					std::vector<TriangleVertex> convexPrimitive;
    614 
    615 					// Clip triangle and form a convex n-gon ( n c {3, 4} )
    616 					clipTriangleToPlane(convexPrimitive, subTriangles[subTriangleNdx].vertices, *planes[planeNdx]);
    617 
    618 					// Subtriangle completely discarded
    619 					if (convexPrimitive.empty())
    620 						continue;
    621 
    622 					DE_ASSERT(convexPrimitive.size() == 3 || convexPrimitive.size() == 4);
    623 
    624 					//Triangulate planar convex n-gon
    625 					{
    626 						TriangleVertex& v0 = convexPrimitive[0];
    627 
    628 						for (int subsubTriangleNdx = 1; subsubTriangleNdx + 1 < (int)convexPrimitive.size(); ++subsubTriangleNdx)
    629 						{
    630 							const float				degenerateEpsilon	= 1.0e-6f;
    631 							const TriangleVertex&	v1					= convexPrimitive[subsubTriangleNdx];
    632 							const TriangleVertex&	v2					= convexPrimitive[subsubTriangleNdx + 1];
    633 							const float				visibleArea			= de::abs(cross2D(to2DCartesian(clipVec4ToVec4(v1.position)) - to2DCartesian(clipVec4ToVec4(v0.position)),
    634 																						  to2DCartesian(clipVec4ToVec4(v2.position)) - to2DCartesian(clipVec4ToVec4(v0.position))));
    635 
    636 							// has surface area (is not a degenerate)
    637 							if (visibleArea >= degenerateEpsilon)
    638 							{
    639 								SubTriangle subsubTriangle;
    640 
    641 								subsubTriangle.vertices[0] = v0;
    642 								subsubTriangle.vertices[1] = v1;
    643 								subsubTriangle.vertices[2] = v2;
    644 
    645 								nextPhaseSubTriangles.push_back(subsubTriangle);
    646 							}
    647 						}
    648 					}
    649 				}
    650 
    651 				subTriangles.swap(nextPhaseSubTriangles);
    652 			}
    653 
    654 			// Rebuild pa::Triangles from subtriangles
    655 			for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
    656 			{
    657 				VertexPacket*	p0				= vpalloc.alloc();
    658 				VertexPacket*	p1				= vpalloc.alloc();
    659 				VertexPacket*	p2				= vpalloc.alloc();
    660 				pa::Triangle	ngonFragment	(p0, p1, p2, -1);
    661 
    662 				p0->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[0].position);
    663 				p1->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[1].position);
    664 				p2->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[2].position);
    665 
    666 				for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
    667 				{
    668 					if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
    669 					{
    670 						const tcu::Vec4 out0 = list[inputTriangleNdx].v0->outputs[outputNdx].get<float>();
    671 						const tcu::Vec4 out1 = list[inputTriangleNdx].v1->outputs[outputNdx].get<float>();
    672 						const tcu::Vec4 out2 = list[inputTriangleNdx].v2->outputs[outputNdx].get<float>();
    673 
    674 						p0->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[0].weight[0] * out0
    675 											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[1] * out1
    676 											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[2] * out2;
    677 
    678 						p1->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[1].weight[0] * out0
    679 											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[1] * out1
    680 											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[2] * out2;
    681 
    682 						p2->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[2].weight[0] * out0
    683 											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[1] * out1
    684 											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[2] * out2;
    685 					}
    686 					else
    687 					{
    688 						// only floats are interpolated, all others must be flatshaded then
    689 						p0->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
    690 						p1->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
    691 						p2->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
    692 					}
    693 				}
    694 
    695 				outputTriangles.push_back(ngonFragment);
    696 			}
    697 		}
    698 	}
    699 
    700 	// output result
    701 	list.swap(outputTriangles);
    702 }
    703 
    704 /*--------------------------------------------------------------------*//*!
    705  * Clip lines to the near and far clip planes.
    706  *
    707  * Clipping to other planes is a by-product of the viewport test  (i.e.
    708  * rasterization area selection).
    709  *//*--------------------------------------------------------------------*/
    710 void clipPrimitives (std::vector<pa::Line>& 		list,
    711 					 const Program& 				program,
    712 					 bool 							clipWithZPlanes,
    713 					 VertexPacketAllocator&			vpalloc)
    714 {
    715 	DE_UNREF(vpalloc);
    716 
    717 	using namespace cliputil;
    718 
    719 	// Lines are clipped only by the far and the near planes here. Line clipping by other planes done in the rasterization phase
    720 
    721 	const std::vector<rr::VertexVaryingInfo>&	fragInputs	= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
    722 	std::vector<pa::Line>						visibleLines;
    723 
    724 	// Z-clipping disabled, don't do anything
    725 	if (!clipWithZPlanes)
    726 		return;
    727 
    728 	for (size_t ndx = 0; ndx < list.size(); ++ndx)
    729 	{
    730 		pa::Line& l = list[ndx];
    731 
    732 		// Totally discarded?
    733 		if ((l.v0->position.z() < -l.v0->position.w() && l.v1->position.z() < -l.v1->position.w()) ||
    734 			(l.v0->position.z() >  l.v0->position.w() && l.v1->position.z() >  l.v1->position.w()))
    735 			continue; // discard
    736 
    737 		// Something is visible
    738 
    739 		const ClipVec4	p0	= vec4ToClipVec4(l.v0->position);
    740 		const ClipVec4	p1	= vec4ToClipVec4(l.v1->position);
    741 		const ClipFloat	t0	= getLineEndpointClipping(p0, p1);
    742 		const ClipFloat	t1	= getLineEndpointClipping(p1, p0);
    743 
    744 		// Not clipped at all?
    745 		if (t0 == (ClipFloat)0.0 && t1 == (ClipFloat)0.0)
    746 		{
    747 			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
    748 		}
    749 		else
    750 		{
    751 			// Clip position
    752 			l.v0->position = clipVec4ToVec4(tcu::mix(p0, p1, t0));
    753 			l.v1->position = clipVec4ToVec4(tcu::mix(p1, p0, t1));
    754 
    755 			// Clip attributes
    756 			for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
    757 			{
    758 				// only floats are clipped, other types are flatshaded
    759 				if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
    760 				{
    761 					const tcu::Vec4 a0 = l.v0->outputs[outputNdx].get<float>();
    762 					const tcu::Vec4 a1 = l.v1->outputs[outputNdx].get<float>();
    763 
    764 					l.v0->outputs[outputNdx] = tcu::mix(a0, a1, (float)t0);
    765 					l.v1->outputs[outputNdx] = tcu::mix(a1, a0, (float)t1);
    766 				}
    767 			}
    768 
    769 			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
    770 		}
    771 	}
    772 
    773 	// return visible in list
    774 	std::swap(visibleLines, list);
    775 }
    776 
    777 /*--------------------------------------------------------------------*//*!
    778  * Discard points not within clip volume. Clipping is a by-product
    779  * of the viewport test.
    780  *//*--------------------------------------------------------------------*/
    781 void clipPrimitives (std::vector<pa::Point>&		list,
    782 					 const Program&					program,
    783 					 bool							clipWithZPlanes,
    784 					 VertexPacketAllocator&			vpalloc)
    785 {
    786 	DE_UNREF(vpalloc);
    787 	DE_UNREF(program);
    788 
    789 	std::vector<pa::Point> visiblePoints;
    790 
    791 	// Z-clipping disabled, don't do anything
    792 	if (!clipWithZPlanes)
    793 		return;
    794 
    795 	for (size_t ndx = 0; ndx < list.size(); ++ndx)
    796 	{
    797 		pa::Point& p = list[ndx];
    798 
    799 		// points are discarded if Z is not in range. (Wide) point clipping is done in the rasterization phase
    800 		if (de::inRange(p.v0->position.z(), -p.v0->position.w(), p.v0->position.w()))
    801 			visiblePoints.push_back(pa::Point(p.v0));
    802 	}
    803 
    804 	// return visible in list
    805 	std::swap(visiblePoints, list);
    806 }
    807 
    808 void transformVertexClipCoordsToWindowCoords (const RenderState& state, VertexPacket& packet)
    809 {
    810 	// To normalized device coords
    811 	{
    812 		packet.position = tcu::Vec4(packet.position.x()/packet.position.w(),
    813 									packet.position.y()/packet.position.w(),
    814 									packet.position.z()/packet.position.w(),
    815 									1.0f               /packet.position.w());
    816 	}
    817 
    818 	// To window coords
    819 	{
    820 		const WindowRectangle&	viewport	= state.viewport.rect;
    821 		const float				halfW		= (float)(viewport.width) / 2.0f;
    822 		const float				halfH		= (float)(viewport.height) / 2.0f;
    823 		const float				oX			= (float)viewport.left + halfW;
    824 		const float				oY			= (float)viewport.bottom + halfH;
    825 		const float				zn			= state.viewport.zn;
    826 		const float				zf			= state.viewport.zf;
    827 
    828 		packet.position = tcu::Vec4(packet.position.x()*halfW + oX,
    829 									packet.position.y()*halfH + oY,
    830 									packet.position.z()*(zf - zn)/2.0f + (zn + zf)/2.0f,
    831 									packet.position.w());
    832 	}
    833 }
    834 
    835 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Triangle& target)
    836 {
    837 	transformVertexClipCoordsToWindowCoords(state, *target.v0);
    838 	transformVertexClipCoordsToWindowCoords(state, *target.v1);
    839 	transformVertexClipCoordsToWindowCoords(state, *target.v2);
    840 }
    841 
    842 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Line& target)
    843 {
    844 	transformVertexClipCoordsToWindowCoords(state, *target.v0);
    845 	transformVertexClipCoordsToWindowCoords(state, *target.v1);
    846 }
    847 
    848 void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Point& target)
    849 {
    850 	transformVertexClipCoordsToWindowCoords(state, *target.v0);
    851 }
    852 
    853 template <typename ContainerType>
    854 void transformClipCoordsToWindowCoords (const RenderState& state, ContainerType& list)
    855 {
    856 	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
    857 		transformPrimitiveClipCoordsToWindowCoords(state, *it);
    858 }
    859 
    860 void makeSharedVerticeDistinct (VertexPacket*& packet, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
    861 {
    862 	// distinct
    863 	if (vertices.find(packet) == vertices.end())
    864 	{
    865 		vertices.insert(packet);
    866 	}
    867 	else
    868 	{
    869 		VertexPacket* newPacket = vpalloc.alloc();
    870 
    871 		// copy packet output values
    872 		newPacket->position		= packet->position;
    873 		newPacket->pointSize	= packet->pointSize;
    874 		newPacket->primitiveID	= packet->primitiveID;
    875 
    876 		for (size_t outputNdx = 0; outputNdx < vpalloc.getNumVertexOutputs(); ++outputNdx)
    877 			newPacket->outputs[outputNdx] = packet->outputs[outputNdx];
    878 
    879 		// no need to insert new packet to "vertices" as newPacket is unique
    880 		packet = newPacket;
    881 	}
    882 }
    883 
    884 void makeSharedVerticesDistinct (pa::Triangle& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
    885 {
    886 	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
    887 	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
    888 	makeSharedVerticeDistinct(target.v2, vertices, vpalloc);
    889 }
    890 
    891 void makeSharedVerticesDistinct (pa::Line& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
    892 {
    893 	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
    894 	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
    895 }
    896 
    897 void makeSharedVerticesDistinct (pa::Point& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
    898 {
    899 	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
    900 }
    901 
    902 template <typename ContainerType>
    903 void makeSharedVerticesDistinct (ContainerType& list, VertexPacketAllocator& vpalloc)
    904 {
    905 	std::set<VertexPacket*, std::less<void*> > vertices;
    906 
    907 	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
    908 		makeSharedVerticesDistinct(*it, vertices, vpalloc);
    909 }
    910 
    911 void generatePrimitiveIDs (pa::Triangle& target, int id)
    912 {
    913 	target.v0->primitiveID = id;
    914 	target.v1->primitiveID = id;
    915 	target.v2->primitiveID = id;
    916 }
    917 
    918 void generatePrimitiveIDs (pa::Line& target, int id)
    919 {
    920 	target.v0->primitiveID = id;
    921 	target.v1->primitiveID = id;
    922 }
    923 
    924 void generatePrimitiveIDs (pa::Point& target, int id)
    925 {
    926 	target.v0->primitiveID = id;
    927 }
    928 
    929 template <typename ContainerType>
    930 void generatePrimitiveIDs (ContainerType& list, DrawContext& drawContext)
    931 {
    932 	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
    933 		generatePrimitiveIDs(*it, drawContext.primitiveID++);
    934 }
    935 
    936 static float findTriangleVertexDepthSlope (const tcu::Vec4& p, const tcu::Vec4& v0, const tcu::Vec4& v1)
    937 {
    938 	// screen space
    939 	const tcu::Vec3 ssp		=  p.swizzle(0, 1, 2);
    940 	const tcu::Vec3 ssv0	= v0.swizzle(0, 1, 2);
    941 	const tcu::Vec3 ssv1	= v1.swizzle(0, 1, 2);
    942 
    943 	// dx & dy
    944 
    945 	const tcu::Vec3 a		= ssv0.swizzle(0,1,2) - ssp.swizzle(0,1,2);
    946 	const tcu::Vec3 b		= ssv1.swizzle(0,1,2) - ssp.swizzle(0,1,2);
    947 	const float		epsilon	= 0.0001f;
    948 	const float		det		= (a.x() * b.y() - b.x() * a.y());
    949 
    950 	// degenerate triangle, it won't generate any fragments anyway. Return value doesn't matter
    951 	if (de::abs(det) < epsilon)
    952 		return 0.0f;
    953 
    954 	const tcu::Vec2	dxDir	= tcu::Vec2( b.y(), -a.y()) / det;
    955 	const tcu::Vec2	dyDir	= tcu::Vec2(-b.x(),  a.x()) / det;
    956 
    957 	const float		dzdx	= dxDir.x() * a.z() + dxDir.y() * b.z();
    958 	const float		dzdy	= dyDir.x() * a.z() + dyDir.y() * b.z();
    959 
    960 	// approximate using max(|dz/dx|, |dz/dy|)
    961 	return de::max(de::abs(dzdx), de::abs(dzdy));
    962 }
    963 
    964 static float findPrimitiveMaximumDepthSlope (const pa::Triangle& triangle)
    965 {
    966 	const float d1 = findTriangleVertexDepthSlope(triangle.v0->position, triangle.v1->position, triangle.v2->position);
    967 	const float d2 = findTriangleVertexDepthSlope(triangle.v1->position, triangle.v2->position, triangle.v0->position);
    968 	const float d3 = findTriangleVertexDepthSlope(triangle.v2->position, triangle.v0->position, triangle.v1->position);
    969 
    970 	return de::max(d1, de::max(d2, d3));
    971 }
    972 
    973 static float getFloatingPointMinimumResolvableDifference (float maxZValue, tcu::TextureFormat::ChannelType type)
    974 {
    975 	if (type == tcu::TextureFormat::FLOAT)
    976 	{
    977 		// 32f
    978 		const int maxExponent = tcu::Float32(maxZValue).exponent();
    979 		return tcu::Float32::construct(+1, maxExponent - 23, 1 << 23).asFloat();
    980 	}
    981 
    982 	// unexpected format
    983 	DE_ASSERT(false);
    984 	return 0.0f;
    985 }
    986 
    987 static float getFixedPointMinimumResolvableDifference (int numBits)
    988 {
    989 	return tcu::Float32::construct(+1, -numBits, 1 << 23).asFloat();
    990 }
    991 
    992 static float findPrimitiveMinimumResolvableDifference (const pa::Triangle& triangle, const rr::MultisampleConstPixelBufferAccess& depthAccess)
    993 {
    994 	const float								maxZvalue		= de::max(de::max(triangle.v0->position.z(), triangle.v1->position.z()), triangle.v2->position.z());
    995 	const tcu::TextureFormat				format			= depthAccess.raw().getFormat();
    996 	const tcu::TextureFormat::ChannelOrder	order			= format.order;
    997 
    998 	if (order == tcu::TextureFormat::D)
    999 	{
   1000 		// depth only
   1001 		const tcu::TextureFormat::ChannelType	channelType		= format.type;
   1002 		const tcu::TextureChannelClass			channelClass	= tcu::getTextureChannelClass(channelType);
   1003 		const int								numBits			= tcu::getTextureFormatBitDepth(format).x();
   1004 
   1005 		if (channelClass == tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
   1006 			return getFloatingPointMinimumResolvableDifference(maxZvalue, channelType);
   1007 		else
   1008 			// \note channelClass might be CLASS_LAST but that's ok
   1009 			return getFixedPointMinimumResolvableDifference(numBits);
   1010 	}
   1011 	else if (order == tcu::TextureFormat::DS)
   1012 	{
   1013 		// depth stencil, special cases for possible combined formats
   1014 		if (format.type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
   1015 			return getFloatingPointMinimumResolvableDifference(maxZvalue, tcu::TextureFormat::FLOAT);
   1016 		else if (format.type == tcu::TextureFormat::UNSIGNED_INT_24_8)
   1017 			return getFixedPointMinimumResolvableDifference(24);
   1018 	}
   1019 
   1020 	// unexpected format
   1021 	DE_ASSERT(false);
   1022 	return 0.0f;
   1023 }
   1024 
   1025 void writeFragmentPackets (const RenderState&					state,
   1026 						   const RenderTarget&					renderTarget,
   1027 						   const Program&						program,
   1028 						   const FragmentPacket*				fragmentPackets,
   1029 						   int									numRasterizedPackets,
   1030 						   rr::FaceType							facetype,
   1031 						   const std::vector<rr::GenericVec4>&	fragmentOutputArray,
   1032 						   const float*							depthValues,
   1033 						   std::vector<Fragment>&				fragmentBuffer)
   1034 {
   1035 	const int			numSamples		= renderTarget.getNumSamples();
   1036 	const size_t		numOutputs		= program.fragmentShader->getOutputs().size();
   1037 	FragmentProcessor	fragProcessor;
   1038 
   1039 	DE_ASSERT(fragmentOutputArray.size() >= (size_t)numRasterizedPackets*4*numOutputs);
   1040 	DE_ASSERT(fragmentBuffer.size()      >= (size_t)numRasterizedPackets*4);
   1041 
   1042 	// Translate fragments but do not set the value yet
   1043 	{
   1044 		int	fragCount = 0;
   1045 		for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
   1046 		for (int fragNdx = 0; fragNdx < 4; fragNdx++)
   1047 		{
   1048 			const FragmentPacket&	packet	= fragmentPackets[packetNdx];
   1049 			const int				xo		= fragNdx%2;
   1050 			const int				yo		= fragNdx/2;
   1051 
   1052 			if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
   1053 			{
   1054 				Fragment& fragment		= fragmentBuffer[fragCount++];
   1055 
   1056 				fragment.pixelCoord		= packet.position + tcu::IVec2(xo, yo);
   1057 				fragment.coverage		= (deUint32)((packet.coverage & getCoverageFragmentSampleBits(numSamples, xo, yo)) >> getCoverageOffset(numSamples, xo, yo));
   1058 				fragment.sampleDepths	= (depthValues) ? (&depthValues[(packetNdx*4 + yo*2 + xo)*numSamples]) : (DE_NULL);
   1059 			}
   1060 		}
   1061 	}
   1062 
   1063 	// Set per output output values
   1064 	{
   1065 		rr::FragmentOperationState noStencilDepthWriteState(state.fragOps);
   1066 		noStencilDepthWriteState.depthMask						= false;
   1067 		noStencilDepthWriteState.stencilStates[facetype].sFail	= STENCILOP_KEEP;
   1068 		noStencilDepthWriteState.stencilStates[facetype].dpFail	= STENCILOP_KEEP;
   1069 		noStencilDepthWriteState.stencilStates[facetype].dpPass	= STENCILOP_KEEP;
   1070 
   1071 		int	fragCount = 0;
   1072 		for (size_t outputNdx = 0; outputNdx < numOutputs; ++outputNdx)
   1073 		{
   1074 			// Only the last output-pass has default state, other passes have stencil & depth writemask=0
   1075 			const rr::FragmentOperationState& fragOpsState = (outputNdx == numOutputs-1) ? (state.fragOps) : (noStencilDepthWriteState);
   1076 
   1077 			for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
   1078 			for (int fragNdx = 0; fragNdx < 4; fragNdx++)
   1079 			{
   1080 				const FragmentPacket&	packet	= fragmentPackets[packetNdx];
   1081 				const int				xo		= fragNdx%2;
   1082 				const int				yo		= fragNdx/2;
   1083 
   1084 				// Add only fragments that have live samples to shaded fragments queue.
   1085 				if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
   1086 				{
   1087 					Fragment& fragment		= fragmentBuffer[fragCount++];
   1088 					fragment.value			= fragmentOutputArray[(packetNdx*4 + fragNdx) * numOutputs + outputNdx];
   1089 				}
   1090 			}
   1091 
   1092 			// Execute per-fragment ops and write
   1093 			fragProcessor.render(renderTarget.getColorBuffer((int)outputNdx), renderTarget.getDepthBuffer(), renderTarget.getStencilBuffer(), &fragmentBuffer[0], fragCount, facetype, fragOpsState);
   1094 		}
   1095 	}
   1096 }
   1097 
   1098 void rasterizePrimitive (const RenderState&					state,
   1099 						 const RenderTarget&				renderTarget,
   1100 						 const Program&						program,
   1101 						 const pa::Triangle&				triangle,
   1102 						 const tcu::IVec4&					renderTargetRect,
   1103 						 RasterizationInternalBuffers&		buffers)
   1104 {
   1105 	const int			numSamples		= renderTarget.getNumSamples();
   1106 	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
   1107 	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
   1108 	TriangleRasterizer	rasterizer		(renderTargetRect, numSamples, state.rasterization);
   1109 	float				depthOffset		= 0.0f;
   1110 
   1111 	rasterizer.init(triangle.v0->position, triangle.v1->position, triangle.v2->position);
   1112 
   1113 	// Culling
   1114 	const FaceType visibleFace = rasterizer.getVisibleFace();
   1115 	if ((state.cullMode == CULLMODE_FRONT	&& visibleFace == FACETYPE_FRONT) ||
   1116 		(state.cullMode == CULLMODE_BACK	&& visibleFace == FACETYPE_BACK))
   1117 		return;
   1118 
   1119 	// Shading context
   1120 	FragmentShadingContext shadingContext(triangle.v0->outputs, triangle.v1->outputs, triangle.v2->outputs, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, triangle.v2->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
   1121 
   1122 	// Polygon offset
   1123 	if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
   1124 	{
   1125 		const float maximumDepthSlope			= findPrimitiveMaximumDepthSlope(triangle);
   1126 		const float minimumResolvableDifference	= findPrimitiveMinimumResolvableDifference(triangle, renderTarget.getDepthBuffer());
   1127 
   1128 		depthOffset = maximumDepthSlope * state.fragOps.polygonOffsetFactor + minimumResolvableDifference * state.fragOps.polygonOffsetUnits;
   1129 	}
   1130 
   1131 	// Execute rasterize - shade - write loop
   1132 	for (;;)
   1133 	{
   1134 		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
   1135 		int			numRasterizedPackets	= 0;
   1136 
   1137 		// Rasterize
   1138 
   1139 		rasterizer.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
   1140 
   1141 		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
   1142 
   1143 		if (!numRasterizedPackets)
   1144 			break; // Rasterization finished.
   1145 
   1146 		// Polygon offset
   1147 		if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
   1148 			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
   1149 				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx] + depthOffset, 0.0f, 1.0f);
   1150 
   1151 		// Shade
   1152 
   1153 		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
   1154 
   1155 		// Depth clamp
   1156 		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
   1157 			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
   1158 				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
   1159 
   1160 		// Handle fragment shader outputs
   1161 
   1162 		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, visibleFace, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
   1163 	}
   1164 }
   1165 
   1166 void rasterizePrimitive (const RenderState&					state,
   1167 						 const RenderTarget&				renderTarget,
   1168 						 const Program&						program,
   1169 						 const pa::Line&					line,
   1170 						 const tcu::IVec4&					renderTargetRect,
   1171 						 RasterizationInternalBuffers&		buffers)
   1172 {
   1173 	const int					numSamples			= renderTarget.getNumSamples();
   1174 	const float					depthClampMin		= de::min(state.viewport.zn, state.viewport.zf);
   1175 	const float					depthClampMax		= de::max(state.viewport.zn, state.viewport.zf);
   1176 	const bool					msaa				= numSamples > 1;
   1177 	FragmentShadingContext		shadingContext		(line.v0->outputs, line.v1->outputs, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, line.v1->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
   1178 	SingleSampleLineRasterizer	aliasedRasterizer	(renderTargetRect);
   1179 	MultiSampleLineRasterizer	msaaRasterizer		(numSamples, renderTargetRect);
   1180 
   1181 	// Initialize rasterization.
   1182 	if (msaa)
   1183 		msaaRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
   1184 	else
   1185 		aliasedRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
   1186 
   1187 	for (;;)
   1188 	{
   1189 		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
   1190 		int			numRasterizedPackets	= 0;
   1191 
   1192 		// Rasterize
   1193 
   1194 		if (msaa)
   1195 			msaaRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
   1196 		else
   1197 			aliasedRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
   1198 
   1199 		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
   1200 
   1201 		if (!numRasterizedPackets)
   1202 			break; // Rasterization finished.
   1203 
   1204 		// Shade
   1205 
   1206 		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
   1207 
   1208 		// Depth clamp
   1209 		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
   1210 			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
   1211 				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
   1212 
   1213 		// Handle fragment shader outputs
   1214 
   1215 		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
   1216 	}
   1217 }
   1218 
   1219 void rasterizePrimitive (const RenderState&					state,
   1220 						 const RenderTarget&				renderTarget,
   1221 						 const Program&						program,
   1222 						 const pa::Point&					point,
   1223 						 const tcu::IVec4&					renderTargetRect,
   1224 						 RasterizationInternalBuffers&		buffers)
   1225 {
   1226 	const int			numSamples		= renderTarget.getNumSamples();
   1227 	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
   1228 	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
   1229 	TriangleRasterizer	rasterizer1		(renderTargetRect, numSamples, state.rasterization);
   1230 	TriangleRasterizer	rasterizer2		(renderTargetRect, numSamples, state.rasterization);
   1231 
   1232 	// draw point as two triangles
   1233 	const float offset				= point.v0->pointSize / 2.0f;
   1234 	const tcu::Vec4		w0			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
   1235 	const tcu::Vec4		w1			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
   1236 	const tcu::Vec4		w2			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
   1237 	const tcu::Vec4		w3			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
   1238 
   1239 	rasterizer1.init(w0, w1, w2);
   1240 	rasterizer2.init(w0, w2, w3);
   1241 
   1242 	// Shading context
   1243 	FragmentShadingContext shadingContext(point.v0->outputs, DE_NULL, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, point.v0->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
   1244 
   1245 	// Execute rasterize - shade - write loop
   1246 	for (;;)
   1247 	{
   1248 		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
   1249 		int			numRasterizedPackets	= 0;
   1250 
   1251 		// Rasterize both triangles
   1252 
   1253 		rasterizer1.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
   1254 		if (numRasterizedPackets != maxFragmentPackets)
   1255 		{
   1256 			float* const	depthBufferAppendPointer	= (buffers.fragmentDepthBuffer) ? (buffers.fragmentDepthBuffer + numRasterizedPackets*numSamples*4) : (DE_NULL);
   1257 			int				numRasterizedPackets2		= 0;
   1258 
   1259 			rasterizer2.rasterize(&buffers.fragmentPackets[numRasterizedPackets], depthBufferAppendPointer, maxFragmentPackets - numRasterizedPackets, numRasterizedPackets2);
   1260 
   1261 			numRasterizedPackets += numRasterizedPackets2;
   1262 		}
   1263 
   1264 		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
   1265 
   1266 		if (!numRasterizedPackets)
   1267 			break; // Rasterization finished.
   1268 
   1269 		// Shade
   1270 
   1271 		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
   1272 
   1273 		// Depth clamp
   1274 		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
   1275 			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
   1276 				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
   1277 
   1278 		// Handle fragment shader outputs
   1279 
   1280 		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
   1281 	}
   1282 }
   1283 
   1284 template <typename ContainerType>
   1285 void rasterize (const RenderState&					state,
   1286 				const RenderTarget&					renderTarget,
   1287 				const Program&						program,
   1288 				const ContainerType&				list)
   1289 {
   1290 	const int						numSamples			= renderTarget.getNumSamples();
   1291 	const int						numFragmentOutputs	= (int)program.fragmentShader->getOutputs().size();
   1292 	const size_t					maxFragmentPackets	= 128;
   1293 
   1294 	const tcu::IVec4				viewportRect		= tcu::IVec4(state.viewport.rect.left, state.viewport.rect.bottom, state.viewport.rect.width, state.viewport.rect.height);
   1295 	const tcu::IVec4				bufferRect			= getBufferSize(renderTarget.getColorBuffer(0));
   1296 	const tcu::IVec4				renderTargetRect	= rectIntersection(viewportRect, bufferRect);
   1297 
   1298 	// shared buffers for all primitives
   1299 	std::vector<FragmentPacket>		fragmentPackets		(maxFragmentPackets);
   1300 	std::vector<GenericVec4>		shaderOutputs		(maxFragmentPackets*4*numFragmentOutputs);
   1301 	std::vector<Fragment>			shadedFragments		(maxFragmentPackets*4);
   1302 	std::vector<float>				depthValues			(0);
   1303 	float*							depthBufferPointer	= DE_NULL;
   1304 
   1305 	RasterizationInternalBuffers	buffers;
   1306 
   1307 	// calculate depth only if we have a depth buffer
   1308 	if (!isEmpty(renderTarget.getDepthBuffer()))
   1309 	{
   1310 		depthValues.resize(maxFragmentPackets*4*numSamples);
   1311 		depthBufferPointer = &depthValues[0];
   1312 	}
   1313 
   1314 	// set buffers
   1315 	buffers.fragmentPackets.swap(fragmentPackets);
   1316 	buffers.shaderOutputs.swap(shaderOutputs);
   1317 	buffers.shadedFragments.swap(shadedFragments);
   1318 	buffers.fragmentDepthBuffer = depthBufferPointer;
   1319 
   1320 	// rasterize
   1321 	for (typename ContainerType::const_iterator it = list.begin(); it != list.end(); ++it)
   1322 		rasterizePrimitive(state, renderTarget, program, *it, renderTargetRect, buffers);
   1323 }
   1324 
   1325 /*--------------------------------------------------------------------*//*!
   1326  * Draws transformed triangles, lines or points to render target
   1327  *//*--------------------------------------------------------------------*/
   1328 template <typename ContainerType>
   1329 void drawBasicPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, ContainerType& primList, VertexPacketAllocator& vpalloc)
   1330 {
   1331 	const bool clipZ = !state.fragOps.depthClampEnabled;
   1332 
   1333 	// Transform feedback
   1334 
   1335 	// Flatshading
   1336 	flatshadeVertices(program, primList);
   1337 
   1338 	// Clipping
   1339 	// \todo [jarkko] is creating & swapping std::vectors really a good solution?
   1340 	clipPrimitives(primList, program, clipZ, vpalloc);
   1341 
   1342 	// Transform vertices to window coords
   1343 	transformClipCoordsToWindowCoords(state, primList);
   1344 
   1345 	// Rasterize and paint
   1346 	rasterize(state, renderTarget, program, primList);
   1347 }
   1348 
   1349 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Point& in)
   1350 {
   1351 	dst[0] = in.v0;
   1352 }
   1353 
   1354 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Line& in)
   1355 {
   1356 	dst[0] = in.v0;
   1357 	dst[1] = in.v1;
   1358 }
   1359 
   1360 void copyVertexPacketPointers(const VertexPacket** dst, const pa::Triangle& in)
   1361 {
   1362 	dst[0] = in.v0;
   1363 	dst[1] = in.v1;
   1364 	dst[2] = in.v2;
   1365 }
   1366 
   1367 void copyVertexPacketPointers(const VertexPacket** dst, const pa::LineAdjacency& in)
   1368 {
   1369 	dst[0] = in.v0;
   1370 	dst[1] = in.v1;
   1371 	dst[2] = in.v2;
   1372 	dst[3] = in.v3;
   1373 }
   1374 
   1375 void copyVertexPacketPointers(const VertexPacket** dst, const pa::TriangleAdjacency& in)
   1376 {
   1377 	dst[0] = in.v0;
   1378 	dst[1] = in.v1;
   1379 	dst[2] = in.v2;
   1380 	dst[3] = in.v3;
   1381 	dst[4] = in.v4;
   1382 	dst[5] = in.v5;
   1383 }
   1384 
   1385 template <PrimitiveType DrawPrimitiveType> // \note DrawPrimitiveType  can only be Points, line_strip, or triangle_strip
   1386 void drawGeometryShaderOutputAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, size_t numVertices, VertexPacketAllocator& vpalloc)
   1387 {
   1388 	// Run primitive assembly for generated stream
   1389 
   1390 	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
   1391 	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType>	inputPrimitives				(assemblerPrimitiveCount);
   1392 
   1393 	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, numVertices, state.provokingVertexConvention); // \note input Primitives are baseType_t => only basic primitives (non adjacency) will compile
   1394 
   1395 	// Make shared vertices distinct
   1396 
   1397 	makeSharedVerticesDistinct(inputPrimitives, vpalloc);
   1398 
   1399 	// Draw assembled primitives
   1400 
   1401 	drawBasicPrimitives(state, renderTarget, program, inputPrimitives, vpalloc);
   1402 }
   1403 
   1404 template <PrimitiveType DrawPrimitiveType>
   1405 void drawWithGeometryShader(const RenderState& state, const RenderTarget& renderTarget, const Program& program, std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>& input, DrawContext& drawContext)
   1406 {
   1407 	// Vertices outputted by geometry shader may have different number of output variables than the original, create new memory allocator
   1408 	VertexPacketAllocator vpalloc(program.geometryShader->getOutputs().size());
   1409 
   1410 	// Run geometry shader for all primitives
   1411 	GeometryEmitter					emitter			(vpalloc, program.geometryShader->getNumVerticesOut());
   1412 	std::vector<PrimitivePacket>	primitives		(input.size());
   1413 	const int						numInvocations	= (int)program.geometryShader->getNumInvocations();
   1414 	const int						verticesIn		= PrimitiveTypeTraits<DrawPrimitiveType>::Type::NUM_VERTICES;
   1415 
   1416 	for (size_t primitiveNdx = 0; primitiveNdx < input.size(); ++primitiveNdx)
   1417 	{
   1418 		primitives[primitiveNdx].primitiveIDIn = drawContext.primitiveID++;
   1419 		copyVertexPacketPointers(primitives[primitiveNdx].vertices, input[primitiveNdx]);
   1420 	}
   1421 
   1422 	if (primitives.empty())
   1423 		return;
   1424 
   1425 	for (int invocationNdx = 0; invocationNdx < numInvocations; ++invocationNdx)
   1426 	{
   1427 		// Shading invocation
   1428 
   1429 		program.geometryShader->shadePrimitives(emitter, verticesIn, &primitives[0], (int)primitives.size(), invocationNdx);
   1430 
   1431 		// Find primitives in the emitted vertices
   1432 
   1433 		std::vector<VertexPacket*> emitted;
   1434 		emitter.moveEmittedTo(emitted);
   1435 
   1436 		for (size_t primitiveBegin = 0; primitiveBegin < emitted.size();)
   1437 		{
   1438 			size_t primitiveEnd;
   1439 
   1440 			// Find primitive begin
   1441 			if (!emitted[primitiveBegin])
   1442 			{
   1443 				++primitiveBegin;
   1444 				continue;
   1445 			}
   1446 
   1447 			// Find primitive end
   1448 
   1449 			primitiveEnd = primitiveBegin + 1;
   1450 			for (; (primitiveEnd < emitted.size()) && emitted[primitiveEnd]; ++primitiveEnd); // find primitive end
   1451 
   1452 			// Draw range [begin, end)
   1453 
   1454 			switch (program.geometryShader->getOutputType())
   1455 			{
   1456 				case rr::GEOMETRYSHADEROUTPUTTYPE_POINTS:			drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_POINTS>			(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
   1457 				case rr::GEOMETRYSHADEROUTPUTTYPE_LINE_STRIP:		drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_LINE_STRIP>		(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
   1458 				case rr::GEOMETRYSHADEROUTPUTTYPE_TRIANGLE_STRIP:	drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>	(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
   1459 				default:
   1460 					DE_ASSERT(DE_FALSE);
   1461 			}
   1462 
   1463 			// Next primitive
   1464 			primitiveBegin = primitiveEnd + 1;
   1465 		}
   1466 	}
   1467 }
   1468 
   1469 /*--------------------------------------------------------------------*//*!
   1470  * Assembles, tesselates, runs geometry shader and draws primitives of any type from vertex list.
   1471  *//*--------------------------------------------------------------------*/
   1472 template <PrimitiveType DrawPrimitiveType>
   1473 void drawAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, int numVertices, DrawContext& drawContext, VertexPacketAllocator& vpalloc)
   1474 {
   1475 	// Assemble primitives (deconstruct stips & loops)
   1476 	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
   1477 	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>		inputPrimitives				(assemblerPrimitiveCount);
   1478 
   1479 	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, (size_t)numVertices, state.provokingVertexConvention);
   1480 
   1481 	// Tesselate
   1482 	//if (state.tesselation)
   1483 	//	primList = state.tesselation.exec(primList);
   1484 
   1485 	// Geometry shader
   1486 	if (program.geometryShader)
   1487 	{
   1488 		// If there is an active geometry shader, it will convert any primitive type to basic types
   1489 		drawWithGeometryShader<DrawPrimitiveType>(state, renderTarget, program, inputPrimitives, drawContext);
   1490 	}
   1491 	else
   1492 	{
   1493 		std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType> basePrimitives;
   1494 
   1495 		// convert types from X_adjacency to X
   1496 		convertPrimitiveToBaseType(basePrimitives, inputPrimitives);
   1497 
   1498 		// Make shared vertices distinct. Needed for that the translation to screen space happens only once per vertex, and for flatshading
   1499 		makeSharedVerticesDistinct(basePrimitives, vpalloc);
   1500 
   1501 		// A primitive ID will be generated even if no geometry shader is active
   1502 		generatePrimitiveIDs(basePrimitives, drawContext);
   1503 
   1504 		// Draw as a basic type
   1505 		drawBasicPrimitives(state, renderTarget, program, basePrimitives, vpalloc);
   1506 	}
   1507 }
   1508 
   1509 bool isValidCommand (const DrawCommand& command, int numInstances)
   1510 {
   1511 	// numInstances should be valid
   1512 	if (numInstances < 1)
   1513 		return false;
   1514 
   1515 	// Shaders should have the same varyings
   1516 	if (command.program.geometryShader)
   1517 	{
   1518 		if (command.program.vertexShader->getOutputs() != command.program.geometryShader->getInputs())
   1519 			return false;
   1520 
   1521 		if (command.program.geometryShader->getOutputs() != command.program.fragmentShader->getInputs())
   1522 			return false;
   1523 	}
   1524 	else
   1525 	{
   1526 		if (command.program.vertexShader->getOutputs() != command.program.fragmentShader->getInputs())
   1527 			return false;
   1528 	}
   1529 
   1530 	// Shader input/output types are set
   1531 	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getInputs().size(); ++varyingNdx)
   1532 		if (command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1533 			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1534 			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1535 			return false;
   1536 	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getOutputs().size(); ++varyingNdx)
   1537 		if (command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1538 			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1539 			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1540 			return false;
   1541 
   1542 	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getInputs().size(); ++varyingNdx)
   1543 		if (command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1544 			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1545 			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1546 			return false;
   1547 	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
   1548 		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1549 			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1550 			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1551 			return false;
   1552 
   1553 	if (command.program.geometryShader)
   1554 	{
   1555 		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getInputs().size(); ++varyingNdx)
   1556 			if (command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1557 				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1558 				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1559 				return false;
   1560 		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getOutputs().size(); ++varyingNdx)
   1561 			if (command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
   1562 				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
   1563 				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
   1564 				return false;
   1565 	}
   1566 
   1567 	// Enough vertex inputs?
   1568 	if ((size_t)command.numVertexAttribs < command.program.vertexShader->getInputs().size())
   1569 		return false;
   1570 
   1571 	// There is a fragment output sink for each output?
   1572 	if ((size_t)command.renderTarget.getNumColorBuffers() < command.program.fragmentShader->getOutputs().size())
   1573 		return false;
   1574 
   1575 	// All destination buffers should have same number of samples and same size
   1576 	for (int outputNdx = 0; outputNdx < command.renderTarget.getNumColorBuffers(); ++outputNdx)
   1577 	{
   1578 		if (getBufferSize(command.renderTarget.getColorBuffer(0)) != getBufferSize(command.renderTarget.getColorBuffer(outputNdx)))
   1579 			return false;
   1580 
   1581 		if (command.renderTarget.getNumSamples() != command.renderTarget.getColorBuffer(outputNdx).getNumSamples())
   1582 			return false;
   1583 	}
   1584 
   1585 	// All destination buffers should have same basic type as matching fragment output
   1586 	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
   1587 	{
   1588 		const tcu::TextureChannelClass	colorbufferClass = tcu::getTextureChannelClass(command.renderTarget.getColorBuffer((int)varyingNdx).raw().getFormat().type);
   1589 		const GenericVecType			colorType		 = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
   1590 
   1591 		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != colorType)
   1592 			return false;
   1593 	}
   1594 
   1595 	// Integer values are flatshaded
   1596 	for (size_t outputNdx = 0; outputNdx < command.program.vertexShader->getOutputs().size(); ++outputNdx)
   1597 	{
   1598 		if (!command.program.vertexShader->getOutputs()[outputNdx].flatshade &&
   1599 			(command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
   1600 			 command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
   1601 			return false;
   1602 	}
   1603 	if (command.program.geometryShader)
   1604 		for (size_t outputNdx = 0; outputNdx < command.program.geometryShader->getOutputs().size(); ++outputNdx)
   1605 		{
   1606 			if (!command.program.geometryShader->getOutputs()[outputNdx].flatshade &&
   1607 				(command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
   1608 				 command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
   1609 				return false;
   1610 		}
   1611 
   1612 	// Draw primitive is valid for geometry shader
   1613 	if (command.program.geometryShader)
   1614 	{
   1615 		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_POINTS && command.primitives.getPrimitiveType() != PRIMITIVETYPE_POINTS)
   1616 			return false;
   1617 
   1618 		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES &&
   1619 			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES &&
   1620 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP &&
   1621 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_LOOP))
   1622 			return false;
   1623 
   1624 		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES &&
   1625 			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES &&
   1626 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP &&
   1627 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_FAN))
   1628 			return false;
   1629 
   1630 		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES_ADJACENCY &&
   1631 			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES_ADJACENCY &&
   1632 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP_ADJACENCY))
   1633 			return false;
   1634 
   1635 		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES_ADJACENCY &&
   1636 			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES_ADJACENCY &&
   1637 			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY))
   1638 			return false;
   1639 	}
   1640 
   1641 	return true;
   1642 }
   1643 
   1644 } // anonymous
   1645 
   1646 RenderTarget::RenderTarget (const MultisamplePixelBufferAccess& colorMultisampleBuffer,
   1647 							const MultisamplePixelBufferAccess& depthMultisampleBuffer,
   1648 							const MultisamplePixelBufferAccess& stencilMultisampleBuffer)
   1649 	: m_numColorBuffers	(1)
   1650 	, m_depthBuffer		(MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(depthMultisampleBuffer.raw(), tcu::Sampler::MODE_DEPTH)))
   1651 	, m_stencilBuffer	(MultisamplePixelBufferAccess::fromMultisampleAccess(tcu::getEffectiveDepthStencilAccess(stencilMultisampleBuffer.raw(), tcu::Sampler::MODE_STENCIL)))
   1652 {
   1653 	m_colorBuffers[0] = colorMultisampleBuffer;
   1654 }
   1655 
   1656 int RenderTarget::getNumSamples (void) const
   1657 {
   1658 	DE_ASSERT(m_numColorBuffers > 0);
   1659 	return m_colorBuffers[0].getNumSamples();
   1660 }
   1661 
   1662 DrawIndices::DrawIndices (const deUint32* ptr, int baseVertex_)
   1663 	: indices	(ptr)
   1664 	, indexType	(INDEXTYPE_UINT32)
   1665 	, baseVertex(baseVertex_)
   1666 {
   1667 }
   1668 
   1669 DrawIndices::DrawIndices (const deUint16* ptr, int baseVertex_)
   1670 	: indices	(ptr)
   1671 	, indexType	(INDEXTYPE_UINT16)
   1672 	, baseVertex(baseVertex_)
   1673 {
   1674 }
   1675 
   1676 DrawIndices::DrawIndices (const deUint8* ptr, int baseVertex_)
   1677 	: indices	(ptr)
   1678 	, indexType	(INDEXTYPE_UINT8)
   1679 	, baseVertex(baseVertex_)
   1680 {
   1681 }
   1682 
   1683 DrawIndices::DrawIndices (const void* ptr, IndexType type, int baseVertex_)
   1684 	: indices	(ptr)
   1685 	, indexType	(type)
   1686 	, baseVertex(baseVertex_)
   1687 {
   1688 }
   1689 
   1690 PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const int firstElement)
   1691 	: m_primitiveType	(primitiveType)
   1692 	, m_numElements		(numElements)
   1693 	, m_indices			(DE_NULL)
   1694 	, m_indexType		(INDEXTYPE_LAST)
   1695 	, m_baseVertex		(firstElement)
   1696 {
   1697 	DE_ASSERT(numElements >= 0 && "Invalid numElements");
   1698 	DE_ASSERT(firstElement >= 0 && "Invalid firstElement");
   1699 }
   1700 
   1701 PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const DrawIndices& indices)
   1702 	: m_primitiveType	(primitiveType)
   1703 	, m_numElements		((size_t)numElements)
   1704 	, m_indices			(indices.indices)
   1705 	, m_indexType		(indices.indexType)
   1706 	, m_baseVertex		(indices.baseVertex)
   1707 {
   1708 	DE_ASSERT(numElements >= 0 && "Invalid numElements");
   1709 }
   1710 
   1711 size_t PrimitiveList::getIndex (size_t elementNdx) const
   1712 {
   1713 	// indices == DE_NULL interpreted as command.indices = [first (=baseVertex) + 0, first + 1, first + 2...]
   1714 	if (m_indices)
   1715 	{
   1716 		int index = m_baseVertex + (int)readIndexArray(m_indexType, m_indices, elementNdx);
   1717 		DE_ASSERT(index >= 0); // do not access indices < 0
   1718 
   1719 		return (size_t)index;
   1720 	}
   1721 	else
   1722 		return (size_t)(m_baseVertex) + elementNdx;
   1723 }
   1724 
   1725 bool PrimitiveList::isRestartIndex (size_t elementNdx, deUint32 restartIndex) const
   1726 {
   1727 	// implicit index or explicit index (without base vertex) equals restart
   1728 	if (m_indices)
   1729 		return readIndexArray(m_indexType, m_indices, elementNdx) == restartIndex;
   1730 	else
   1731 		return elementNdx == (size_t)restartIndex;
   1732 }
   1733 
   1734 Renderer::Renderer (void)
   1735 {
   1736 }
   1737 
   1738 Renderer::~Renderer (void)
   1739 {
   1740 }
   1741 
   1742 void Renderer::draw (const DrawCommand& command) const
   1743 {
   1744 	drawInstanced(command, 1);
   1745 }
   1746 
   1747 void Renderer::drawInstanced (const DrawCommand& command, int numInstances) const
   1748 {
   1749 	// Do not run bad commands
   1750 	{
   1751 		const bool validCommand = isValidCommand(command, numInstances);
   1752 		if (!validCommand)
   1753 		{
   1754 			DE_ASSERT(false);
   1755 			return;
   1756 		}
   1757 	}
   1758 
   1759 	// Do not draw if nothing to draw
   1760 	{
   1761 		if (command.primitives.getNumElements() == 0 || numInstances == 0)
   1762 			return;
   1763 	}
   1764 
   1765 	// Prepare transformation
   1766 
   1767 	const size_t				numVaryings = command.program.vertexShader->getOutputs().size();
   1768 	VertexPacketAllocator		vpalloc(numVaryings);
   1769 	std::vector<VertexPacket*>	vertexPackets = vpalloc.allocArray(command.primitives.getNumElements());
   1770 	DrawContext					drawContext;
   1771 
   1772 	for (int instanceID = 0; instanceID < numInstances; ++instanceID)
   1773 	{
   1774 		// Each instance has its own primitives
   1775 		drawContext.primitiveID = 0;
   1776 
   1777 		for (size_t elementNdx = 0; elementNdx < command.primitives.getNumElements(); ++elementNdx)
   1778 		{
   1779 			int numVertexPackets = 0;
   1780 
   1781 			// collect primitive vertices until restart
   1782 
   1783 			while (elementNdx < command.primitives.getNumElements() &&
   1784 					!(command.state.restart.enabled && command.primitives.isRestartIndex(elementNdx, command.state.restart.restartIndex)))
   1785 			{
   1786 				// input
   1787 				vertexPackets[numVertexPackets]->instanceNdx	= instanceID;
   1788 				vertexPackets[numVertexPackets]->vertexNdx		= (int)command.primitives.getIndex(elementNdx);
   1789 
   1790 				// output
   1791 				vertexPackets[numVertexPackets]->pointSize		= command.state.point.pointSize;	// default value from the current state
   1792 				vertexPackets[numVertexPackets]->position		= tcu::Vec4(0, 0, 0, 0);			// no undefined values
   1793 
   1794 				++numVertexPackets;
   1795 				++elementNdx;
   1796 			}
   1797 
   1798 			// Duplicated restart shade
   1799 			if (numVertexPackets == 0)
   1800 				continue;
   1801 
   1802 			// \todo Vertex cache?
   1803 
   1804 			// Transform vertices
   1805 
   1806 			command.program.vertexShader->shadeVertices(command.vertexAttribs, &vertexPackets[0], numVertexPackets);
   1807 
   1808 			// Draw primitives
   1809 
   1810 			switch (command.primitives.getPrimitiveType())
   1811 			{
   1812 				case PRIMITIVETYPE_TRIANGLES:				{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1813 				case PRIMITIVETYPE_TRIANGLE_STRIP:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1814 				case PRIMITIVETYPE_TRIANGLE_FAN:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_FAN>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1815 				case PRIMITIVETYPE_LINES:					{ drawAsPrimitives<PRIMITIVETYPE_LINES>						(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1816 				case PRIMITIVETYPE_LINE_STRIP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1817 				case PRIMITIVETYPE_LINE_LOOP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_LOOP>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1818 				case PRIMITIVETYPE_POINTS:					{ drawAsPrimitives<PRIMITIVETYPE_POINTS>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1819 				case PRIMITIVETYPE_LINES_ADJACENCY:			{ drawAsPrimitives<PRIMITIVETYPE_LINES_ADJACENCY>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1820 				case PRIMITIVETYPE_LINE_STRIP_ADJACENCY:	{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1821 				case PRIMITIVETYPE_TRIANGLES_ADJACENCY:		{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1822 				case PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY:{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY>	(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
   1823 				default:
   1824 					DE_ASSERT(DE_FALSE);
   1825 			}
   1826 		}
   1827 	}
   1828 }
   1829 
   1830 } // rr
   1831