Home | History | Annotate | Download | only in common
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Tester Core
      3  * ----------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Compressed Texture Utilities.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "tcuCompressedTexture.hpp"
     25 #include "tcuTextureUtil.hpp"
     26 #include "deStringUtil.hpp"
     27 #include "deFloat16.h"
     28 
     29 #include <algorithm>
     30 
     31 namespace tcu
     32 {
     33 
     34 enum { ASTC_BLOCK_SIZE_BYTES = 128/8 };
     35 
     36 template <typename T, typename Y>
     37 struct isSameType			{ enum { V = 0 }; };
     38 template <typename T>
     39 struct isSameType<T, T>		{ enum { V = 1 }; };
     40 
     41 CompressedTexture::CompressedTexture (void)
     42 	: m_format	(FORMAT_LAST)
     43 	, m_width	(0)
     44 	, m_height	(0)
     45 	, m_depth	(0)
     46 {
     47 }
     48 
     49 CompressedTexture::CompressedTexture (Format format, int width, int height, int depth)
     50 	: m_format	(FORMAT_LAST)
     51 	, m_width	(0)
     52 	, m_height	(0)
     53 	, m_depth	(0)
     54 {
     55 	setStorage(format, width, height, depth);
     56 }
     57 
     58 CompressedTexture::~CompressedTexture (void)
     59 {
     60 }
     61 
     62 static inline int divRoundUp (int a, int b)
     63 {
     64 	return a/b + ((a%b) ? 1 : 0);
     65 }
     66 
     67 bool isEtcFormat (CompressedTexture::Format fmt)
     68 {
     69 	switch (fmt)
     70 	{
     71 		case CompressedTexture::ETC1_RGB8:
     72 		case CompressedTexture::EAC_R11:
     73 		case CompressedTexture::EAC_SIGNED_R11:
     74 		case CompressedTexture::EAC_RG11:
     75 		case CompressedTexture::EAC_SIGNED_RG11:
     76 		case CompressedTexture::ETC2_RGB8:
     77 		case CompressedTexture::ETC2_SRGB8:
     78 		case CompressedTexture::ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
     79 		case CompressedTexture::ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
     80 		case CompressedTexture::ETC2_EAC_RGBA8:
     81 		case CompressedTexture::ETC2_EAC_SRGB8_ALPHA8:
     82 			return true;
     83 
     84 		default:
     85 			return false;
     86 	}
     87 }
     88 
     89 bool isASTCFormat (CompressedTexture::Format fmt)
     90 {
     91 	switch (fmt)
     92 	{
     93 		case CompressedTexture::ASTC_4x4_RGBA:
     94 		case CompressedTexture::ASTC_5x4_RGBA:
     95 		case CompressedTexture::ASTC_5x5_RGBA:
     96 		case CompressedTexture::ASTC_6x5_RGBA:
     97 		case CompressedTexture::ASTC_6x6_RGBA:
     98 		case CompressedTexture::ASTC_8x5_RGBA:
     99 		case CompressedTexture::ASTC_8x6_RGBA:
    100 		case CompressedTexture::ASTC_8x8_RGBA:
    101 		case CompressedTexture::ASTC_10x5_RGBA:
    102 		case CompressedTexture::ASTC_10x6_RGBA:
    103 		case CompressedTexture::ASTC_10x8_RGBA:
    104 		case CompressedTexture::ASTC_10x10_RGBA:
    105 		case CompressedTexture::ASTC_12x10_RGBA:
    106 		case CompressedTexture::ASTC_12x12_RGBA:
    107 		case CompressedTexture::ASTC_4x4_SRGB8_ALPHA8:
    108 		case CompressedTexture::ASTC_5x4_SRGB8_ALPHA8:
    109 		case CompressedTexture::ASTC_5x5_SRGB8_ALPHA8:
    110 		case CompressedTexture::ASTC_6x5_SRGB8_ALPHA8:
    111 		case CompressedTexture::ASTC_6x6_SRGB8_ALPHA8:
    112 		case CompressedTexture::ASTC_8x5_SRGB8_ALPHA8:
    113 		case CompressedTexture::ASTC_8x6_SRGB8_ALPHA8:
    114 		case CompressedTexture::ASTC_8x8_SRGB8_ALPHA8:
    115 		case CompressedTexture::ASTC_10x5_SRGB8_ALPHA8:
    116 		case CompressedTexture::ASTC_10x6_SRGB8_ALPHA8:
    117 		case CompressedTexture::ASTC_10x8_SRGB8_ALPHA8:
    118 		case CompressedTexture::ASTC_10x10_SRGB8_ALPHA8:
    119 		case CompressedTexture::ASTC_12x10_SRGB8_ALPHA8:
    120 		case CompressedTexture::ASTC_12x12_SRGB8_ALPHA8:
    121 			return true;
    122 
    123 		default:
    124 			return false;
    125 	}
    126 }
    127 
    128 bool isASTCSRGBFormat (CompressedTexture::Format fmt)
    129 {
    130 	switch (fmt)
    131 	{
    132 		case CompressedTexture::ASTC_4x4_SRGB8_ALPHA8:
    133 		case CompressedTexture::ASTC_5x4_SRGB8_ALPHA8:
    134 		case CompressedTexture::ASTC_5x5_SRGB8_ALPHA8:
    135 		case CompressedTexture::ASTC_6x5_SRGB8_ALPHA8:
    136 		case CompressedTexture::ASTC_6x6_SRGB8_ALPHA8:
    137 		case CompressedTexture::ASTC_8x5_SRGB8_ALPHA8:
    138 		case CompressedTexture::ASTC_8x6_SRGB8_ALPHA8:
    139 		case CompressedTexture::ASTC_8x8_SRGB8_ALPHA8:
    140 		case CompressedTexture::ASTC_10x5_SRGB8_ALPHA8:
    141 		case CompressedTexture::ASTC_10x6_SRGB8_ALPHA8:
    142 		case CompressedTexture::ASTC_10x8_SRGB8_ALPHA8:
    143 		case CompressedTexture::ASTC_10x10_SRGB8_ALPHA8:
    144 		case CompressedTexture::ASTC_12x10_SRGB8_ALPHA8:
    145 		case CompressedTexture::ASTC_12x12_SRGB8_ALPHA8:
    146 			return true;
    147 
    148 		default:
    149 			return false;
    150 	}
    151 }
    152 
    153 IVec3 getASTCBlockSize (CompressedTexture::Format fmt)
    154 {
    155 	switch (fmt)
    156 	{
    157 		case CompressedTexture::ASTC_4x4_RGBA:				return IVec3(4,  4,  1);
    158 		case CompressedTexture::ASTC_5x4_RGBA:				return IVec3(5,  4,  1);
    159 		case CompressedTexture::ASTC_5x5_RGBA:				return IVec3(5,  5,  1);
    160 		case CompressedTexture::ASTC_6x5_RGBA:				return IVec3(6,  5,  1);
    161 		case CompressedTexture::ASTC_6x6_RGBA:				return IVec3(6,  6,  1);
    162 		case CompressedTexture::ASTC_8x5_RGBA:				return IVec3(8,  5,  1);
    163 		case CompressedTexture::ASTC_8x6_RGBA:				return IVec3(8,  6,  1);
    164 		case CompressedTexture::ASTC_8x8_RGBA:				return IVec3(8,  8,  1);
    165 		case CompressedTexture::ASTC_10x5_RGBA:				return IVec3(10, 5,  1);
    166 		case CompressedTexture::ASTC_10x6_RGBA:				return IVec3(10, 6,  1);
    167 		case CompressedTexture::ASTC_10x8_RGBA:				return IVec3(10, 8,  1);
    168 		case CompressedTexture::ASTC_10x10_RGBA:			return IVec3(10, 10, 1);
    169 		case CompressedTexture::ASTC_12x10_RGBA:			return IVec3(12, 10, 1);
    170 		case CompressedTexture::ASTC_12x12_RGBA:			return IVec3(12, 12, 1);
    171 		case CompressedTexture::ASTC_4x4_SRGB8_ALPHA8:		return IVec3(4,  4,  1);
    172 		case CompressedTexture::ASTC_5x4_SRGB8_ALPHA8:		return IVec3(5,  4,  1);
    173 		case CompressedTexture::ASTC_5x5_SRGB8_ALPHA8:		return IVec3(5,  5,  1);
    174 		case CompressedTexture::ASTC_6x5_SRGB8_ALPHA8:		return IVec3(6,  5,  1);
    175 		case CompressedTexture::ASTC_6x6_SRGB8_ALPHA8:		return IVec3(6,  6,  1);
    176 		case CompressedTexture::ASTC_8x5_SRGB8_ALPHA8:		return IVec3(8,  5,  1);
    177 		case CompressedTexture::ASTC_8x6_SRGB8_ALPHA8:		return IVec3(8,  6,  1);
    178 		case CompressedTexture::ASTC_8x8_SRGB8_ALPHA8:		return IVec3(8,  8,  1);
    179 		case CompressedTexture::ASTC_10x5_SRGB8_ALPHA8:		return IVec3(10, 5,  1);
    180 		case CompressedTexture::ASTC_10x6_SRGB8_ALPHA8:		return IVec3(10, 6,  1);
    181 		case CompressedTexture::ASTC_10x8_SRGB8_ALPHA8:		return IVec3(10, 8,  1);
    182 		case CompressedTexture::ASTC_10x10_SRGB8_ALPHA8:	return IVec3(10, 10, 1);
    183 		case CompressedTexture::ASTC_12x10_SRGB8_ALPHA8:	return IVec3(12, 10, 1);
    184 		case CompressedTexture::ASTC_12x12_SRGB8_ALPHA8:	return IVec3(12, 12, 1);
    185 
    186 		default:
    187 			DE_ASSERT(false);
    188 			return IVec3();
    189 	}
    190 }
    191 
    192 CompressedTexture::Format getASTCFormatByBlockSize (int width, int height, int depth, bool isSRGB)
    193 {
    194 	if (depth > 1)
    195 		throw tcu::InternalError("3D ASTC textures not currently supported");
    196 
    197 	const tcu::IVec3 size(width, height, depth);
    198 
    199 	for (int fmtI = 0; fmtI < CompressedTexture::FORMAT_LAST; fmtI++)
    200 	{
    201 		const CompressedTexture::Format fmt = (CompressedTexture::Format)fmtI;
    202 
    203 		if (isASTCFormat(fmt) && getASTCBlockSize(fmt) == size && isASTCSRGBFormat(fmt) == isSRGB)
    204 			return fmt;
    205 	}
    206 
    207 	throw tcu::InternalError("Invalid ASTC block size " + de::toString(width) + "x" + de::toString(height) + "x" + de::toString(depth));
    208 }
    209 
    210 void CompressedTexture::setStorage (Format format, int width, int height, int depth)
    211 {
    212 	m_format	= format;
    213 	m_width		= width;
    214 	m_height	= height;
    215 	m_depth		= depth;
    216 
    217 	if (isEtcFormat(m_format))
    218 	{
    219 		DE_ASSERT(m_depth == 1);
    220 
    221 		int blockSizeMultiplier = 0; // How many 64-bit parts each compressed block contains.
    222 
    223 		switch (m_format)
    224 		{
    225 			case ETC1_RGB8:							blockSizeMultiplier = 1;	break;
    226 			case EAC_R11:							blockSizeMultiplier = 1;	break;
    227 			case EAC_SIGNED_R11:					blockSizeMultiplier = 1;	break;
    228 			case EAC_RG11:							blockSizeMultiplier = 2;	break;
    229 			case EAC_SIGNED_RG11:					blockSizeMultiplier = 2;	break;
    230 			case ETC2_RGB8:							blockSizeMultiplier = 1;	break;
    231 			case ETC2_SRGB8:						blockSizeMultiplier = 1;	break;
    232 			case ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		blockSizeMultiplier = 1;	break;
    233 			case ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	blockSizeMultiplier = 1;	break;
    234 			case ETC2_EAC_RGBA8:					blockSizeMultiplier = 2;	break;
    235 			case ETC2_EAC_SRGB8_ALPHA8:				blockSizeMultiplier = 2;	break;
    236 
    237 			default:
    238 				DE_ASSERT(false);
    239 				break;
    240 		}
    241 
    242 		m_data.resize(blockSizeMultiplier * sizeof(deUint64) * divRoundUp(m_width, 4) * divRoundUp(m_height, 4));
    243 	}
    244 	else if (isASTCFormat(m_format))
    245 	{
    246 		if (m_depth > 1)
    247 			throw tcu::InternalError("3D ASTC textures not currently supported");
    248 
    249 		const IVec3 blockSize = getASTCBlockSize(m_format);
    250 		m_data.resize(ASTC_BLOCK_SIZE_BYTES * divRoundUp(m_width, blockSize.x()) * divRoundUp(m_height, blockSize.y()) * divRoundUp(m_depth, blockSize.z()));
    251 	}
    252 	else
    253 	{
    254 		DE_ASSERT(m_format == FORMAT_LAST);
    255 		DE_ASSERT(m_width == 0 && m_height == 0 && m_depth == 0);
    256 		m_data.resize(0);
    257 	}
    258 }
    259 
    260 /*--------------------------------------------------------------------*//*!
    261  * \brief Get uncompressed texture format
    262  *//*--------------------------------------------------------------------*/
    263 TextureFormat CompressedTexture::getUncompressedFormat (void) const
    264 {
    265 	if (isEtcFormat(m_format))
    266 	{
    267 		switch (m_format)
    268 		{
    269 			case ETC1_RGB8:							return TextureFormat(TextureFormat::RGB,	TextureFormat::UNORM_INT8);
    270 			case EAC_R11:							return TextureFormat(TextureFormat::R,		TextureFormat::UNORM_INT16);
    271 			case EAC_SIGNED_R11:					return TextureFormat(TextureFormat::R,		TextureFormat::SNORM_INT16);
    272 			case EAC_RG11:							return TextureFormat(TextureFormat::RG,		TextureFormat::UNORM_INT16);
    273 			case EAC_SIGNED_RG11:					return TextureFormat(TextureFormat::RG,		TextureFormat::SNORM_INT16);
    274 			case ETC2_RGB8:							return TextureFormat(TextureFormat::RGB,	TextureFormat::UNORM_INT8);
    275 			case ETC2_SRGB8:						return TextureFormat(TextureFormat::sRGB,	TextureFormat::UNORM_INT8);
    276 			case ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		return TextureFormat(TextureFormat::RGBA,	TextureFormat::UNORM_INT8);
    277 			case ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	return TextureFormat(TextureFormat::sRGBA,	TextureFormat::UNORM_INT8);
    278 			case ETC2_EAC_RGBA8:					return TextureFormat(TextureFormat::RGBA,	TextureFormat::UNORM_INT8);
    279 			case ETC2_EAC_SRGB8_ALPHA8:				return TextureFormat(TextureFormat::sRGBA,	TextureFormat::UNORM_INT8);
    280 			default:
    281 				DE_ASSERT(false);
    282 				return TextureFormat();
    283 		}
    284 	}
    285 	else if (isASTCFormat(m_format))
    286 	{
    287 		if (isASTCSRGBFormat(m_format))
    288 			return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
    289 		else
    290 			return TextureFormat(TextureFormat::RGBA, TextureFormat::HALF_FLOAT);
    291 	}
    292 	else
    293 	{
    294 		DE_ASSERT(false);
    295 		return TextureFormat();
    296 	}
    297 }
    298 
    299 // \todo [2013-08-06 nuutti] ETC and ASTC decompression codes are rather unrelated, and are already in their own "private" namespaces - should this be split to multiple files?
    300 
    301 namespace EtcDecompressInternal
    302 {
    303 
    304 enum
    305 {
    306 	ETC2_BLOCK_WIDTH					= 4,
    307 	ETC2_BLOCK_HEIGHT					= 4,
    308 	ETC2_UNCOMPRESSED_PIXEL_SIZE_A8		= 1,
    309 	ETC2_UNCOMPRESSED_PIXEL_SIZE_R11	= 2,
    310 	ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11	= 4,
    311 	ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8	= 3,
    312 	ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8	= 4,
    313 	ETC2_UNCOMPRESSED_BLOCK_SIZE_A8		= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8,
    314 	ETC2_UNCOMPRESSED_BLOCK_SIZE_R11	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11,
    315 	ETC2_UNCOMPRESSED_BLOCK_SIZE_RG11	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11,
    316 	ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8,
    317 	ETC2_UNCOMPRESSED_BLOCK_SIZE_RGBA8	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8
    318 };
    319 
    320 static inline deUint64 get64BitBlock (const deUint8* src, int blockNdx)
    321 {
    322 	// Stored in big-endian form.
    323 	deUint64 block = 0;
    324 	for (int i = 0; i < 8; i++)
    325 		block = (block << 8ull) | (deUint64)(src[blockNdx*8+i]);
    326 	return block;
    327 }
    328 
    329 // Return the first 64 bits of a 128 bit block.
    330 static inline deUint64 get128BitBlockStart (const deUint8* src, int blockNdx)
    331 {
    332 	return get64BitBlock(src, 2*blockNdx);
    333 }
    334 
    335 // Return the last 64 bits of a 128 bit block.
    336 static inline deUint64 get128BitBlockEnd (const deUint8* src, int blockNdx)
    337 {
    338 	return get64BitBlock(src, 2*blockNdx + 1);
    339 }
    340 
    341 static inline deUint32 getBit (deUint64 src, int bit)
    342 {
    343 	return (src >> bit) & 1;
    344 }
    345 
    346 static inline deUint32 getBits (deUint64 src, int low, int high)
    347 {
    348 	const int numBits = (high-low) + 1;
    349 	DE_ASSERT(de::inRange(numBits, 1, 32));
    350 	return (src >> low) & ((1<<numBits)-1);
    351 }
    352 
    353 static inline deUint8 extend4To8 (deUint8 src)
    354 {
    355 	DE_ASSERT((src & ~((1<<4)-1)) == 0);
    356 	return (src << 4) | src;
    357 }
    358 
    359 static inline deUint8 extend5To8 (deUint8 src)
    360 {
    361 	DE_ASSERT((src & ~((1<<5)-1)) == 0);
    362 	return (src << 3) | (src >> 2);
    363 }
    364 
    365 static inline deUint8 extend6To8 (deUint8 src)
    366 {
    367 	DE_ASSERT((src & ~((1<<6)-1)) == 0);
    368 	return (src << 2) | (src >> 4);
    369 }
    370 
    371 static inline deUint8 extend7To8 (deUint8 src)
    372 {
    373 	DE_ASSERT((src & ~((1<<7)-1)) == 0);
    374 	return (src << 1) | (src >> 6);
    375 }
    376 
    377 static inline deInt8 extendSigned3To8 (deUint8 src)
    378 {
    379 	const bool isNeg = (src & (1<<2)) != 0;
    380 	return (deInt8)((isNeg ? ~((1<<3)-1) : 0) | src);
    381 }
    382 
    383 static inline deUint8 extend5Delta3To8 (deUint8 base5, deUint8 delta3)
    384 {
    385 	const deUint8 t = (deUint8)((deInt8)base5 + extendSigned3To8(delta3));
    386 	return extend5To8(t);
    387 }
    388 
    389 static inline deUint16 extend11To16 (deUint16 src)
    390 {
    391 	DE_ASSERT((src & ~((1<<11)-1)) == 0);
    392 	return (src << 5) | (src >> 6);
    393 }
    394 
    395 static inline deInt16 extend11To16WithSign (deInt16 src)
    396 {
    397 	if (src < 0)
    398 		return -(deInt16)extend11To16(-src);
    399 	else
    400 		return (deInt16)extend11To16(src);
    401 }
    402 
    403 static void decompressETC1Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src)
    404 {
    405 	const int		diffBit		= (int)getBit(src, 33);
    406 	const int		flipBit		= (int)getBit(src, 32);
    407 	const deUint32	table[2]	= { getBits(src, 37, 39), getBits(src, 34, 36) };
    408 	deUint8			baseR[2];
    409 	deUint8			baseG[2];
    410 	deUint8			baseB[2];
    411 
    412 	if (diffBit == 0)
    413 	{
    414 		// Individual mode.
    415 		baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
    416 		baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
    417 		baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
    418 		baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
    419 		baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
    420 		baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
    421 	}
    422 	else
    423 	{
    424 		// Differential mode (diffBit == 1).
    425 		deUint8 bR = (deUint8)getBits(src, 59, 63); // 5b
    426 		deUint8 dR = (deUint8)getBits(src, 56, 58); // 3b
    427 		deUint8 bG = (deUint8)getBits(src, 51, 55);
    428 		deUint8 dG = (deUint8)getBits(src, 48, 50);
    429 		deUint8 bB = (deUint8)getBits(src, 43, 47);
    430 		deUint8 dB = (deUint8)getBits(src, 40, 42);
    431 
    432 		baseR[0] = extend5To8(bR);
    433 		baseG[0] = extend5To8(bG);
    434 		baseB[0] = extend5To8(bB);
    435 
    436 		baseR[1] = extend5Delta3To8(bR, dR);
    437 		baseG[1] = extend5Delta3To8(bG, dG);
    438 		baseB[1] = extend5Delta3To8(bB, dB);
    439 	}
    440 
    441 	static const int modifierTable[8][4] =
    442 	{
    443 	//	  00   01   10    11
    444 		{  2,   8,  -2,   -8 },
    445 		{  5,  17,  -5,  -17 },
    446 		{  9,  29,  -9,  -29 },
    447 		{ 13,  42, -13,  -42 },
    448 		{ 18,  60, -18,  -60 },
    449 		{ 24,  80, -24,  -80 },
    450 		{ 33, 106, -33, -106 },
    451 		{ 47, 183, -47, -183 }
    452 	};
    453 
    454 	// Write final pixels.
    455 	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
    456 	{
    457 		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
    458 		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
    459 		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    460 		const int		subBlock		= ((flipBit ? y : x) >= 2) ? 1 : 0;
    461 		const deUint32	tableNdx		= table[subBlock];
    462 		const deUint32	modifierNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
    463 		const int		modifier		= modifierTable[tableNdx][modifierNdx];
    464 
    465 		dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
    466 		dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
    467 		dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
    468 	}
    469 }
    470 
    471 // if alphaMode is true, do PUNCHTHROUGH and store alpha to alphaDst; otherwise do ordinary ETC2 RGB8.
    472 static void decompressETC2Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src, deUint8 alphaDst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], bool alphaMode)
    473 {
    474 	enum Etc2Mode
    475 	{
    476 		MODE_INDIVIDUAL = 0,
    477 		MODE_DIFFERENTIAL,
    478 		MODE_T,
    479 		MODE_H,
    480 		MODE_PLANAR,
    481 
    482 		MODE_LAST
    483 	};
    484 
    485 	const int		diffOpaqueBit	= (int)getBit(src, 33);
    486 	const deInt8	selBR			= (deInt8)getBits(src, 59, 63);	// 5 bits.
    487 	const deInt8	selBG			= (deInt8)getBits(src, 51, 55);
    488 	const deInt8	selBB			= (deInt8)getBits(src, 43, 47);
    489 	const deInt8	selDR			= extendSigned3To8((deUint8)getBits(src, 56, 58)); // 3 bits.
    490 	const deInt8	selDG			= extendSigned3To8((deUint8)getBits(src, 48, 50));
    491 	const deInt8	selDB			= extendSigned3To8((deUint8)getBits(src, 40, 42));
    492 	Etc2Mode		mode;
    493 
    494 	if (!alphaMode && diffOpaqueBit == 0)
    495 		mode = MODE_INDIVIDUAL;
    496 	else if (!de::inRange(selBR + selDR, 0, 31))
    497 		mode = MODE_T;
    498 	else if (!de::inRange(selBG + selDG, 0, 31))
    499 		mode = MODE_H;
    500 	else if (!de::inRange(selBB + selDB, 0, 31))
    501 		mode = MODE_PLANAR;
    502 	else
    503 		mode = MODE_DIFFERENTIAL;
    504 
    505 	if (mode == MODE_INDIVIDUAL || mode == MODE_DIFFERENTIAL)
    506 	{
    507 		// Individual and differential modes have some steps in common, handle them here.
    508 		static const int modifierTable[8][4] =
    509 		{
    510 		//	  00   01   10    11
    511 			{  2,   8,  -2,   -8 },
    512 			{  5,  17,  -5,  -17 },
    513 			{  9,  29,  -9,  -29 },
    514 			{ 13,  42, -13,  -42 },
    515 			{ 18,  60, -18,  -60 },
    516 			{ 24,  80, -24,  -80 },
    517 			{ 33, 106, -33, -106 },
    518 			{ 47, 183, -47, -183 }
    519 		};
    520 
    521 		const int		flipBit		= (int)getBit(src, 32);
    522 		const deUint32	table[2]	= { getBits(src, 37, 39), getBits(src, 34, 36) };
    523 		deUint8			baseR[2];
    524 		deUint8			baseG[2];
    525 		deUint8			baseB[2];
    526 
    527 		if (mode == MODE_INDIVIDUAL)
    528 		{
    529 			// Individual mode, initial values.
    530 			baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
    531 			baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
    532 			baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
    533 			baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
    534 			baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
    535 			baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
    536 		}
    537 		else
    538 		{
    539 			// Differential mode, initial values.
    540 			baseR[0] = extend5To8(selBR);
    541 			baseG[0] = extend5To8(selBG);
    542 			baseB[0] = extend5To8(selBB);
    543 
    544 			baseR[1] = extend5To8((deUint8)(selBR + selDR));
    545 			baseG[1] = extend5To8((deUint8)(selBG + selDG));
    546 			baseB[1] = extend5To8((deUint8)(selBB + selDB));
    547 		}
    548 
    549 		// Write final pixels for individual or differential mode.
    550 		for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
    551 		{
    552 			const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
    553 			const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
    554 			const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    555 			const int		subBlock		= ((flipBit ? y : x) >= 2) ? 1 : 0;
    556 			const deUint32	tableNdx		= table[subBlock];
    557 			const deUint32	modifierNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
    558 			const int		alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
    559 
    560 			// If doing PUNCHTHROUGH version (alphaMode), opaque bit may affect colors.
    561 			if (alphaMode && diffOpaqueBit == 0 && modifierNdx == 2)
    562 			{
    563 				dst[dstOffset+0]			= 0;
    564 				dst[dstOffset+1]			= 0;
    565 				dst[dstOffset+2]			= 0;
    566 				alphaDst[alphaDstOffset]	= 0;
    567 			}
    568 			else
    569 			{
    570 				int modifier;
    571 
    572 				// PUNCHTHROUGH version and opaque bit may also affect modifiers.
    573 				if (alphaMode && diffOpaqueBit == 0 && (modifierNdx == 0 || modifierNdx == 2))
    574 					modifier = 0;
    575 				else
    576 					modifier = modifierTable[tableNdx][modifierNdx];
    577 
    578 				dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
    579 				dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
    580 				dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
    581 
    582 				if (alphaMode)
    583 					alphaDst[alphaDstOffset] = 255;
    584 			}
    585 		}
    586 	}
    587 	else if (mode == MODE_T || mode == MODE_H)
    588 	{
    589 		// T and H modes have some steps in common, handle them here.
    590 		static const int distTable[8] = { 3, 6, 11, 16, 23, 32, 41, 64 };
    591 
    592 		deUint8 paintR[4];
    593 		deUint8 paintG[4];
    594 		deUint8 paintB[4];
    595 
    596 		if (mode == MODE_T)
    597 		{
    598 			// T mode, calculate paint values.
    599 			const deUint8	R1a			= (deUint8)getBits(src, 59, 60);
    600 			const deUint8	R1b			= (deUint8)getBits(src, 56, 57);
    601 			const deUint8	G1			= (deUint8)getBits(src, 52, 55);
    602 			const deUint8	B1			= (deUint8)getBits(src, 48, 51);
    603 			const deUint8	R2			= (deUint8)getBits(src, 44, 47);
    604 			const deUint8	G2			= (deUint8)getBits(src, 40, 43);
    605 			const deUint8	B2			= (deUint8)getBits(src, 36, 39);
    606 			const deUint32	distNdx		= (getBits(src, 34, 35) << 1) | getBit(src, 32);
    607 			const int		dist		= distTable[distNdx];
    608 
    609 			paintR[0] = extend4To8((R1a << 2) | R1b);
    610 			paintG[0] = extend4To8(G1);
    611 			paintB[0] = extend4To8(B1);
    612 			paintR[2] = extend4To8(R2);
    613 			paintG[2] = extend4To8(G2);
    614 			paintB[2] = extend4To8(B2);
    615 			paintR[1] = (deUint8)deClamp32((int)paintR[2] + dist, 0, 255);
    616 			paintG[1] = (deUint8)deClamp32((int)paintG[2] + dist, 0, 255);
    617 			paintB[1] = (deUint8)deClamp32((int)paintB[2] + dist, 0, 255);
    618 			paintR[3] = (deUint8)deClamp32((int)paintR[2] - dist, 0, 255);
    619 			paintG[3] = (deUint8)deClamp32((int)paintG[2] - dist, 0, 255);
    620 			paintB[3] = (deUint8)deClamp32((int)paintB[2] - dist, 0, 255);
    621 		}
    622 		else
    623 		{
    624 			// H mode, calculate paint values.
    625 			const deUint8	R1		= (deUint8)getBits(src, 59, 62);
    626 			const deUint8	G1a		= (deUint8)getBits(src, 56, 58);
    627 			const deUint8	G1b		= (deUint8)getBit(src, 52);
    628 			const deUint8	B1a		= (deUint8)getBit(src, 51);
    629 			const deUint8	B1b		= (deUint8)getBits(src, 47, 49);
    630 			const deUint8	R2		= (deUint8)getBits(src, 43, 46);
    631 			const deUint8	G2		= (deUint8)getBits(src, 39, 42);
    632 			const deUint8	B2		= (deUint8)getBits(src, 35, 38);
    633 			deUint8			baseR[2];
    634 			deUint8			baseG[2];
    635 			deUint8			baseB[2];
    636 			deUint32		baseValue[2];
    637 			deUint32		distNdx;
    638 			int				dist;
    639 
    640 			baseR[0]		= extend4To8(R1);
    641 			baseG[0]		= extend4To8((G1a << 1) | G1b);
    642 			baseB[0]		= extend4To8((B1a << 3) | B1b);
    643 			baseR[1]		= extend4To8(R2);
    644 			baseG[1]		= extend4To8(G2);
    645 			baseB[1]		= extend4To8(B2);
    646 			baseValue[0]	= (((deUint32)baseR[0]) << 16) | (((deUint32)baseG[0]) << 8) | baseB[0];
    647 			baseValue[1]	= (((deUint32)baseR[1]) << 16) | (((deUint32)baseG[1]) << 8) | baseB[1];
    648 			distNdx			= (getBit(src, 34) << 2) | (getBit(src, 32) << 1) | (deUint32)(baseValue[0] >= baseValue[1]);
    649 			dist			= distTable[distNdx];
    650 
    651 			paintR[0]		= (deUint8)deClamp32((int)baseR[0] + dist, 0, 255);
    652 			paintG[0]		= (deUint8)deClamp32((int)baseG[0] + dist, 0, 255);
    653 			paintB[0]		= (deUint8)deClamp32((int)baseB[0] + dist, 0, 255);
    654 			paintR[1]		= (deUint8)deClamp32((int)baseR[0] - dist, 0, 255);
    655 			paintG[1]		= (deUint8)deClamp32((int)baseG[0] - dist, 0, 255);
    656 			paintB[1]		= (deUint8)deClamp32((int)baseB[0] - dist, 0, 255);
    657 			paintR[2]		= (deUint8)deClamp32((int)baseR[1] + dist, 0, 255);
    658 			paintG[2]		= (deUint8)deClamp32((int)baseG[1] + dist, 0, 255);
    659 			paintB[2]		= (deUint8)deClamp32((int)baseB[1] + dist, 0, 255);
    660 			paintR[3]		= (deUint8)deClamp32((int)baseR[1] - dist, 0, 255);
    661 			paintG[3]		= (deUint8)deClamp32((int)baseG[1] - dist, 0, 255);
    662 			paintB[3]		= (deUint8)deClamp32((int)baseB[1] - dist, 0, 255);
    663 		}
    664 
    665 		// Write final pixels for T or H mode.
    666 		for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
    667 		{
    668 			const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
    669 			const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
    670 			const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    671 			const deUint32	paintNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
    672 			const int		alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
    673 
    674 			if (alphaMode && diffOpaqueBit == 0 && paintNdx == 2)
    675 			{
    676 				dst[dstOffset+0]			= 0;
    677 				dst[dstOffset+1]			= 0;
    678 				dst[dstOffset+2]			= 0;
    679 				alphaDst[alphaDstOffset]	= 0;
    680 			}
    681 			else
    682 			{
    683 				dst[dstOffset+0] = (deUint8)deClamp32((int)paintR[paintNdx], 0, 255);
    684 				dst[dstOffset+1] = (deUint8)deClamp32((int)paintG[paintNdx], 0, 255);
    685 				dst[dstOffset+2] = (deUint8)deClamp32((int)paintB[paintNdx], 0, 255);
    686 
    687 				if (alphaMode)
    688 					alphaDst[alphaDstOffset] = 255;
    689 			}
    690 		}
    691 	}
    692 	else
    693 	{
    694 		// Planar mode.
    695 		const deUint8 GO1	= (deUint8)getBit(src, 56);
    696 		const deUint8 GO2	= (deUint8)getBits(src, 49, 54);
    697 		const deUint8 BO1	= (deUint8)getBit(src, 48);
    698 		const deUint8 BO2	= (deUint8)getBits(src, 43, 44);
    699 		const deUint8 BO3	= (deUint8)getBits(src, 39, 41);
    700 		const deUint8 RH1	= (deUint8)getBits(src, 34, 38);
    701 		const deUint8 RH2	= (deUint8)getBit(src, 32);
    702 		const deUint8 RO	= extend6To8((deUint8)getBits(src, 57, 62));
    703 		const deUint8 GO	= extend7To8((GO1 << 6) | GO2);
    704 		const deUint8 BO	= extend6To8((BO1 << 5) | (BO2 << 3) | BO3);
    705 		const deUint8 RH	= extend6To8((RH1 << 1) | RH2);
    706 		const deUint8 GH	= extend7To8((deUint8)getBits(src, 25, 31));
    707 		const deUint8 BH	= extend6To8((deUint8)getBits(src, 19, 24));
    708 		const deUint8 RV	= extend6To8((deUint8)getBits(src, 13, 18));
    709 		const deUint8 GV	= extend7To8((deUint8)getBits(src, 6, 12));
    710 		const deUint8 BV	= extend6To8((deUint8)getBits(src, 0, 5));
    711 
    712 		// Write final pixels for planar mode.
    713 		for (int y = 0; y < 4; y++)
    714 		{
    715 			for (int x = 0; x < 4; x++)
    716 			{
    717 				const int dstOffset			= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    718 				const int unclampedR		= (x * ((int)RH-(int)RO) + y * ((int)RV-(int)RO) + 4*(int)RO + 2) >> 2;
    719 				const int unclampedG		= (x * ((int)GH-(int)GO) + y * ((int)GV-(int)GO) + 4*(int)GO + 2) >> 2;
    720 				const int unclampedB		= (x * ((int)BH-(int)BO) + y * ((int)BV-(int)BO) + 4*(int)BO + 2) >> 2;
    721 				const int alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
    722 
    723 				dst[dstOffset+0] = (deUint8)deClamp32(unclampedR, 0, 255);
    724 				dst[dstOffset+1] = (deUint8)deClamp32(unclampedG, 0, 255);
    725 				dst[dstOffset+2] = (deUint8)deClamp32(unclampedB, 0, 255);
    726 
    727 				if (alphaMode)
    728 					alphaDst[alphaDstOffset] = 255;
    729 			}
    730 		}
    731 	}
    732 }
    733 
    734 static void decompressEAC8Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], deUint64 src)
    735 {
    736 	static const int modifierTable[16][8] =
    737 	{
    738 		{-3,  -6,  -9, -15,  2,  5,  8, 14},
    739 		{-3,  -7, -10, -13,  2,  6,  9, 12},
    740 		{-2,  -5,  -8, -13,  1,  4,  7, 12},
    741 		{-2,  -4,  -6, -13,  1,  3,  5, 12},
    742 		{-3,  -6,  -8, -12,  2,  5,  7, 11},
    743 		{-3,  -7,  -9, -11,  2,  6,  8, 10},
    744 		{-4,  -7,  -8, -11,  3,  6,  7, 10},
    745 		{-3,  -5,  -8, -11,  2,  4,  7, 10},
    746 		{-2,  -6,  -8, -10,  1,  5,  7,  9},
    747 		{-2,  -5,  -8, -10,  1,  4,  7,  9},
    748 		{-2,  -4,  -8, -10,  1,  3,  7,  9},
    749 		{-2,  -5,  -7, -10,  1,  4,  6,  9},
    750 		{-3,  -4,  -7, -10,  2,  3,  6,  9},
    751 		{-1,  -2,  -3, -10,  0,  1,  2,  9},
    752 		{-4,  -6,  -8,  -9,  3,  5,  7,  8},
    753 		{-3,  -5,  -7,  -9,  2,  4,  6,  8}
    754 	};
    755 
    756 	const deUint8	baseCodeword	= (deUint8)getBits(src, 56, 63);
    757 	const deUint8	multiplier		= (deUint8)getBits(src, 52, 55);
    758 	const deUint32	tableNdx		= getBits(src, 48, 51);
    759 
    760 	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
    761 	{
    762 		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
    763 		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
    764 		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8;
    765 		const int		pixelBitNdx		= 45 - 3*pixelNdx;
    766 		const deUint32	modifierNdx		= (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
    767 		const int		modifier		= modifierTable[tableNdx][modifierNdx];
    768 
    769 		dst[dstOffset] = (deUint8)deClamp32((int)baseCodeword + (int)multiplier*modifier, 0, 255);
    770 	}
    771 }
    772 
    773 static void decompressEAC11Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11], deUint64 src, bool signedMode)
    774 {
    775 	static const int modifierTable[16][8] =
    776 	{
    777 		{-3,  -6,  -9, -15,  2,  5,  8, 14},
    778 		{-3,  -7, -10, -13,  2,  6,  9, 12},
    779 		{-2,  -5,  -8, -13,  1,  4,  7, 12},
    780 		{-2,  -4,  -6, -13,  1,  3,  5, 12},
    781 		{-3,  -6,  -8, -12,  2,  5,  7, 11},
    782 		{-3,  -7,  -9, -11,  2,  6,  8, 10},
    783 		{-4,  -7,  -8, -11,  3,  6,  7, 10},
    784 		{-3,  -5,  -8, -11,  2,  4,  7, 10},
    785 		{-2,  -6,  -8, -10,  1,  5,  7,  9},
    786 		{-2,  -5,  -8, -10,  1,  4,  7,  9},
    787 		{-2,  -4,  -8, -10,  1,  3,  7,  9},
    788 		{-2,  -5,  -7, -10,  1,  4,  6,  9},
    789 		{-3,  -4,  -7, -10,  2,  3,  6,  9},
    790 		{-1,  -2,  -3, -10,  0,  1,  2,  9},
    791 		{-4,  -6,  -8,  -9,  3,  5,  7,  8},
    792 		{-3,  -5,  -7,  -9,  2,  4,  6,  8}
    793 	};
    794 
    795 	const deInt32 multiplier	= (deInt32)getBits(src, 52, 55);
    796 	const deInt32 tableNdx		= (deInt32)getBits(src, 48, 51);
    797 	deInt32 baseCodeword		= (deInt32)getBits(src, 56, 63);
    798 
    799 	if (signedMode)
    800 	{
    801 		if (baseCodeword > 127)
    802 			baseCodeword -= 256;
    803 		if (baseCodeword == -128)
    804 			baseCodeword = -127;
    805 	}
    806 
    807 	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
    808 	{
    809 		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
    810 		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
    811 		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
    812 		const int		pixelBitNdx		= 45 - 3*pixelNdx;
    813 		const deUint32	modifierNdx		= (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
    814 		const int		modifier		= modifierTable[tableNdx][modifierNdx];
    815 
    816 		if (signedMode)
    817 		{
    818 			if (multiplier != 0)
    819 				*(deInt16*)&dst[dstOffset] = (deInt16)deClamp32(baseCodeword*8 + multiplier*modifier*8, -1023, 1023);
    820 			else
    821 				*(deInt16*)&dst[dstOffset] = (deInt16)deClamp32(baseCodeword*8 + modifier, -1023, 1023);
    822 		}
    823 		else
    824 		{
    825 			if (multiplier != 0)
    826 				*(deUint16*)&dst[dstOffset] = (deUint16)deClamp32(baseCodeword*8 + 4 + multiplier*modifier*8, 0, 2047);
    827 			else
    828 				*(deUint16*)&dst[dstOffset] = (deUint16)deClamp32(baseCodeword*8 + 4 + modifier, 0, 2047);
    829 		}
    830 	}
    831 }
    832 
    833 } // EtcDecompressInternal
    834 
    835 static void decompressETC1 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src)
    836 {
    837 	using namespace EtcDecompressInternal;
    838 
    839 	DE_ASSERT(dst.getWidth() == width && dst.getHeight() == height && dst.getDepth() == 1);
    840 	DE_ASSERT(dst.getFormat() == TextureFormat(TextureFormat::RGB, TextureFormat::UNORM_INT8));
    841 
    842 	const int		numBlocksX		= divRoundUp(width, 4);
    843 	const int		numBlocksY		= divRoundUp(height, 4);
    844 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
    845 	const int		dstRowPitch		= dst.getRowPitch();
    846 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    847 
    848 	for (int blockY = 0; blockY < numBlocksY; blockY++)
    849 	{
    850 		for (int blockX = 0; blockX < numBlocksX; blockX++)
    851 		{
    852 			const deUint64	compressedBlock = get64BitBlock(src, blockY*numBlocksX + blockX);
    853 			deUint8			uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
    854 
    855 			// Decompress.
    856 			decompressETC1Block(uncompressedBlock, compressedBlock);
    857 
    858 			// Write to dst.
    859 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
    860 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
    861 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
    862 			{
    863 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
    864 				{
    865 					const deUint8* const	srcPixel = &uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
    866 					deUint8* const			dstPixel = dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize;
    867 
    868 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8 == 3);
    869 					dstPixel[0] = srcPixel[0];
    870 					dstPixel[1] = srcPixel[1];
    871 					dstPixel[2] = srcPixel[2];
    872 				}
    873 			}
    874 		}
    875 	}
    876 }
    877 
    878 static void decompressETC2 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src)
    879 {
    880 	using namespace EtcDecompressInternal;
    881 
    882 	const int		numBlocksX		= divRoundUp(width, 4);
    883 	const int		numBlocksY		= divRoundUp(height, 4);
    884 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
    885 	const int		dstRowPitch		= dst.getRowPitch();
    886 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
    887 
    888 	for (int blockY = 0; blockY < numBlocksY; blockY++)
    889 	{
    890 		for (int blockX = 0; blockX < numBlocksX; blockX++)
    891 		{
    892 			const deUint64	compressedBlock = get64BitBlock(src, blockY*numBlocksX + blockX);
    893 			deUint8			uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
    894 
    895 			// Decompress.
    896 			decompressETC2Block(uncompressedBlock, compressedBlock, NULL, false);
    897 
    898 			// Write to dst.
    899 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
    900 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
    901 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
    902 			{
    903 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
    904 				{
    905 					const deUint8* const	srcPixel = &uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
    906 					deUint8* const			dstPixel = dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize;
    907 
    908 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8 == 3);
    909 					dstPixel[0] = srcPixel[0];
    910 					dstPixel[1] = srcPixel[1];
    911 					dstPixel[2] = srcPixel[2];
    912 				}
    913 			}
    914 		}
    915 	}
    916 }
    917 
    918 static void decompressETC2_EAC_RGBA8 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src)
    919 {
    920 	using namespace EtcDecompressInternal;
    921 
    922 	const int		numBlocksX		= divRoundUp(width, 4);
    923 	const int		numBlocksY		= divRoundUp(height, 4);
    924 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
    925 	const int		dstRowPitch		= dst.getRowPitch();
    926 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
    927 
    928 	for (int blockY = 0; blockY < numBlocksY; blockY++)
    929 	{
    930 		for (int blockX = 0; blockX < numBlocksX; blockX++)
    931 		{
    932 			const deUint64	compressedBlockAlpha	= get128BitBlockStart(src, blockY*numBlocksX + blockX);
    933 			const deUint64	compressedBlockRGB		= get128BitBlockEnd(src, blockY*numBlocksX + blockX);
    934 			deUint8			uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
    935 			deUint8			uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
    936 
    937 			// Decompress.
    938 			decompressETC2Block(uncompressedBlockRGB, compressedBlockRGB, NULL, false);
    939 			decompressEAC8Block(uncompressedBlockAlpha, compressedBlockAlpha);
    940 
    941 			// Write to dst.
    942 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
    943 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
    944 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
    945 			{
    946 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
    947 				{
    948 					const deUint8* const	srcPixelRGB		= &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
    949 					const deUint8* const	srcPixelAlpha	= &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
    950 					deUint8* const			dstPixel		= dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize;
    951 
    952 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
    953 					dstPixel[0] = srcPixelRGB[0];
    954 					dstPixel[1] = srcPixelRGB[1];
    955 					dstPixel[2] = srcPixelRGB[2];
    956 					dstPixel[3] = srcPixelAlpha[0];
    957 				}
    958 			}
    959 		}
    960 	}
    961 }
    962 
    963 static void decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src)
    964 {
    965 	using namespace EtcDecompressInternal;
    966 
    967 	const int		numBlocksX		= divRoundUp(width, 4);
    968 	const int		numBlocksY		= divRoundUp(height, 4);
    969 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
    970 	const int		dstRowPitch		= dst.getRowPitch();
    971 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
    972 
    973 	for (int blockY = 0; blockY < numBlocksY; blockY++)
    974 	{
    975 		for (int blockX = 0; blockX < numBlocksX; blockX++)
    976 		{
    977 			const deUint64	compressedBlockRGBA	= get64BitBlock(src, blockY*numBlocksX + blockX);
    978 			deUint8			uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
    979 			deUint8			uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
    980 
    981 			// Decompress.
    982 			decompressETC2Block(uncompressedBlockRGB, compressedBlockRGBA, uncompressedBlockAlpha, DE_TRUE);
    983 
    984 			// Write to dst.
    985 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
    986 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
    987 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
    988 			{
    989 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
    990 				{
    991 					const deUint8* const	srcPixel		= &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
    992 					const deUint8* const	srcPixelAlpha	= &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
    993 					deUint8* const			dstPixel		= dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize;
    994 
    995 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
    996 					dstPixel[0] = srcPixel[0];
    997 					dstPixel[1] = srcPixel[1];
    998 					dstPixel[2] = srcPixel[2];
    999 					dstPixel[3] = srcPixelAlpha[0];
   1000 				}
   1001 			}
   1002 		}
   1003 	}
   1004 }
   1005 
   1006 static void decompressEAC_R11 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src, bool signedMode)
   1007 {
   1008 	using namespace EtcDecompressInternal;
   1009 
   1010 	const int		numBlocksX		= divRoundUp(width, 4);
   1011 	const int		numBlocksY		= divRoundUp(height, 4);
   1012 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
   1013 	const int		dstRowPitch		= dst.getRowPitch();
   1014 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
   1015 
   1016 	for (int blockY = 0; blockY < numBlocksY; blockY++)
   1017 	{
   1018 		for (int blockX = 0; blockX < numBlocksX; blockX++)
   1019 		{
   1020 			const deUint64	compressedBlock = get64BitBlock(src, blockY*numBlocksX + blockX);
   1021 			deUint8			uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
   1022 
   1023 			// Decompress.
   1024 			decompressEAC11Block(uncompressedBlock, compressedBlock, signedMode);
   1025 
   1026 			// Write to dst.
   1027 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
   1028 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
   1029 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
   1030 			{
   1031 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
   1032 				{
   1033 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_R11 == 2);
   1034 
   1035 					if (signedMode)
   1036 					{
   1037 						const deInt16* const	srcPixel = (deInt16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1038 						deInt16* const			dstPixel = (deInt16*)(dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize);
   1039 
   1040 						dstPixel[0] = extend11To16WithSign(srcPixel[0]);
   1041 					}
   1042 					else
   1043 					{
   1044 						const deUint16* const	srcPixel = (deUint16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1045 						deUint16* const			dstPixel = (deUint16*)(dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize);
   1046 
   1047 						dstPixel[0] = extend11To16(srcPixel[0]);
   1048 					}
   1049 				}
   1050 			}
   1051 		}
   1052 	}
   1053 }
   1054 
   1055 static void decompressEAC_RG11 (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* src, bool signedMode)
   1056 {
   1057 	using namespace EtcDecompressInternal;
   1058 
   1059 	const int		numBlocksX		= divRoundUp(width, 4);
   1060 	const int		numBlocksY		= divRoundUp(height, 4);
   1061 	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
   1062 	const int		dstRowPitch		= dst.getRowPitch();
   1063 	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11;
   1064 
   1065 	for (int blockY = 0; blockY < numBlocksY; blockY++)
   1066 	{
   1067 		for (int blockX = 0; blockX < numBlocksX; blockX++)
   1068 		{
   1069 			const deUint64	compressedBlockR = get128BitBlockStart(src, blockY*numBlocksX + blockX);
   1070 			const deUint64	compressedBlockG = get128BitBlockEnd(src, blockY*numBlocksX + blockX);
   1071 			deUint8			uncompressedBlockR[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
   1072 			deUint8			uncompressedBlockG[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
   1073 
   1074 			// Decompress.
   1075 			decompressEAC11Block(uncompressedBlockR, compressedBlockR, signedMode);
   1076 			decompressEAC11Block(uncompressedBlockG, compressedBlockG, signedMode);
   1077 
   1078 			// Write to dst.
   1079 			const int baseX = blockX*ETC2_BLOCK_WIDTH;
   1080 			const int baseY = blockY*ETC2_BLOCK_HEIGHT;
   1081 			for (int y = 0; y < de::min((int)ETC2_BLOCK_HEIGHT, height-baseY); y++)
   1082 			{
   1083 				for (int x = 0; x < de::min((int)ETC2_BLOCK_WIDTH, width-baseX); x++)
   1084 				{
   1085 					DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11 == 4);
   1086 
   1087 					if (signedMode)
   1088 					{
   1089 						const deInt16* const	srcPixelR	= (deInt16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1090 						const deInt16* const	srcPixelG	= (deInt16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1091 						deInt16* const			dstPixel	= (deInt16*)(dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize);
   1092 
   1093 						dstPixel[0] = extend11To16WithSign(srcPixelR[0]);
   1094 						dstPixel[1] = extend11To16WithSign(srcPixelG[0]);
   1095 					}
   1096 					else
   1097 					{
   1098 						const deUint16* const	srcPixelR	= (deUint16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1099 						const deUint16* const	srcPixelG	= (deUint16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
   1100 						deUint16* const			dstPixel	= (deUint16*)(dstPtr + (baseY+y)*dstRowPitch + (baseX+x)*dstPixelSize);
   1101 
   1102 						dstPixel[0] = extend11To16(srcPixelR[0]);
   1103 						dstPixel[1] = extend11To16(srcPixelG[0]);
   1104 					}
   1105 				}
   1106 			}
   1107 		}
   1108 	}
   1109 }
   1110 
   1111 namespace ASTCDecompressInternal
   1112 {
   1113 
   1114 enum
   1115 {
   1116 	ASTC_MAX_BLOCK_WIDTH	= 12,
   1117 	ASTC_MAX_BLOCK_HEIGHT	= 12
   1118 };
   1119 
   1120 static inline deUint32 getBit (deUint32 src, int ndx)
   1121 {
   1122 	DE_ASSERT(de::inBounds(ndx, 0, 32));
   1123 	return (src >> ndx) & 1;
   1124 }
   1125 
   1126 static inline deUint32 getBits (deUint32 src, int low, int high)
   1127 {
   1128 	const int numBits = (high-low) + 1;
   1129 	DE_ASSERT(de::inRange(numBits, 1, 32));
   1130 	return (src >> low) & ((1u<<numBits)-1);
   1131 }
   1132 
   1133 static inline bool isBitSet (deUint32 src, int ndx)
   1134 {
   1135 	return getBit(src, ndx) != 0;
   1136 }
   1137 
   1138 static inline deUint32 reverseBits (deUint32 src, int numBits)
   1139 {
   1140 	DE_ASSERT(de::inRange(numBits, 0, 32));
   1141 	deUint32 result = 0;
   1142 	for (int i = 0; i < numBits; i++)
   1143 		result |= ((src >> i) & 1) << (numBits-1-i);
   1144 	return result;
   1145 }
   1146 
   1147 static inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
   1148 {
   1149 	DE_ASSERT(numSrcBits <= numDstBits);
   1150 	DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
   1151 	deUint32 dst = 0;
   1152 	for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
   1153 		dst |= shift >= 0 ? src << shift : src >> -shift;
   1154 	return dst;
   1155 }
   1156 
   1157 static inline deInt32 signExtend (deInt32 src, int numSrcBits)
   1158 {
   1159 	DE_ASSERT(de::inRange(numSrcBits, 2, 31));
   1160 	const bool negative = (src & (1 << (numSrcBits-1))) != 0;
   1161 	return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
   1162 }
   1163 
   1164 static inline bool isFloat16InfOrNan (deFloat16 v)
   1165 {
   1166 	return getBits(v, 10, 14) == 31;
   1167 }
   1168 
   1169 // A helper for getting bits from a 128-bit block.
   1170 class Block128
   1171 {
   1172 private:
   1173 	typedef deUint64 Word;
   1174 
   1175 	enum
   1176 	{
   1177 		WORD_BYTES	= sizeof(Word),
   1178 		WORD_BITS	= 8*WORD_BYTES,
   1179 		NUM_WORDS	= 128 / WORD_BITS
   1180 	};
   1181 
   1182 	DE_STATIC_ASSERT(128 % WORD_BITS == 0);
   1183 
   1184 public:
   1185 	Block128 (const deUint8* src)
   1186 	{
   1187 		for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
   1188 		{
   1189 			m_words[wordNdx] = 0;
   1190 			for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
   1191 				m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
   1192 		}
   1193 	}
   1194 
   1195 	deUint32 getBit (int ndx) const
   1196 	{
   1197 		DE_ASSERT(de::inBounds(ndx, 0, 128));
   1198 		return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
   1199 	}
   1200 
   1201 	deUint32 getBits (int low, int high) const
   1202 	{
   1203 		DE_ASSERT(de::inBounds(low, 0, 128));
   1204 		DE_ASSERT(de::inBounds(high, 0, 128));
   1205 		DE_ASSERT(de::inRange(high-low+1, 0, 32));
   1206 
   1207 		if (high-low+1 == 0)
   1208 			return 0;
   1209 
   1210 		const int word0Ndx = low / WORD_BITS;
   1211 		const int word1Ndx = high / WORD_BITS;
   1212 
   1213 		// \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
   1214 
   1215 		if (word0Ndx == word1Ndx)
   1216 			return (m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS);
   1217 		else
   1218 		{
   1219 			DE_ASSERT(word1Ndx == word0Ndx + 1);
   1220 
   1221 			return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
   1222 				   (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
   1223 		}
   1224 	}
   1225 
   1226 	bool isBitSet (int ndx) const
   1227 	{
   1228 		DE_ASSERT(de::inBounds(ndx, 0, 128));
   1229 		return getBit(ndx) != 0;
   1230 	}
   1231 
   1232 private:
   1233 	Word m_words[NUM_WORDS];
   1234 };
   1235 
   1236 // A helper for sequential access into a Block128.
   1237 class BitAccessStream
   1238 {
   1239 public:
   1240 	BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
   1241 		: m_src				(src)
   1242 		, m_startNdxInSrc	(startNdxInSrc)
   1243 		, m_length			(length)
   1244 		, m_forward			(forward)
   1245 		, m_ndx				(0)
   1246 	{
   1247 	}
   1248 
   1249 	// Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
   1250 	deUint32 getNext (int num)
   1251 	{
   1252 		if (num == 0 || m_ndx >= m_length)
   1253 			return 0;
   1254 
   1255 		const int end				= m_ndx + num;
   1256 		const int numBitsFromSrc	= de::max(0, de::min(m_length, end) - m_ndx);
   1257 		const int low				= m_ndx;
   1258 		const int high				= m_ndx + numBitsFromSrc - 1;
   1259 
   1260 		m_ndx += num;
   1261 
   1262 		return m_forward ?			   m_src.getBits(m_startNdxInSrc + low,  m_startNdxInSrc + high)
   1263 						 : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
   1264 	}
   1265 
   1266 private:
   1267 	const Block128&		m_src;
   1268 	const int			m_startNdxInSrc;
   1269 	const int			m_length;
   1270 	const bool			m_forward;
   1271 
   1272 	int					m_ndx;
   1273 };
   1274 
   1275 enum ISEMode
   1276 {
   1277 	ISEMODE_TRIT = 0,
   1278 	ISEMODE_QUINT,
   1279 	ISEMODE_PLAIN_BIT,
   1280 
   1281 	ISEMODE_LAST
   1282 };
   1283 
   1284 struct ISEParams
   1285 {
   1286 	ISEMode		mode;
   1287 	int			numBits;
   1288 
   1289 	ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
   1290 };
   1291 
   1292 static inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
   1293 {
   1294 	switch (iseParams.mode)
   1295 	{
   1296 		case ISEMODE_TRIT:			return divRoundUp(numValues*8, 5) + numValues*iseParams.numBits;
   1297 		case ISEMODE_QUINT:			return divRoundUp(numValues*7, 3) + numValues*iseParams.numBits;
   1298 		case ISEMODE_PLAIN_BIT:		return numValues*iseParams.numBits;
   1299 		default:
   1300 			DE_ASSERT(false);
   1301 			return -1;
   1302 	}
   1303 }
   1304 
   1305 struct ISEDecodedResult
   1306 {
   1307 	deUint32 m;
   1308 	deUint32 tq; //!< Trit or quint value, depending on ISE mode.
   1309 	deUint32 v;
   1310 };
   1311 
   1312 // Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
   1313 struct ASTCBlockMode
   1314 {
   1315 	bool		isError;
   1316 	// \note Following fields only relevant if !isError.
   1317 	bool		isVoidExtent;
   1318 	// \note Following fields only relevant if !isVoidExtent.
   1319 	bool		isDualPlane;
   1320 	int			weightGridWidth;
   1321 	int			weightGridHeight;
   1322 	ISEParams	weightISEParams;
   1323 
   1324 	ASTCBlockMode (void)
   1325 		: isError			(true)
   1326 		, isVoidExtent		(true)
   1327 		, isDualPlane		(true)
   1328 		, weightGridWidth	(-1)
   1329 		, weightGridHeight	(-1)
   1330 		, weightISEParams	(ISEMODE_LAST, -1)
   1331 	{
   1332 	}
   1333 };
   1334 
   1335 static inline int computeNumWeights (const ASTCBlockMode& mode)
   1336 {
   1337 	return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
   1338 }
   1339 
   1340 struct ColorEndpointPair
   1341 {
   1342 	UVec4 e0;
   1343 	UVec4 e1;
   1344 };
   1345 
   1346 struct TexelWeightPair
   1347 {
   1348 	deUint32 w[2];
   1349 };
   1350 
   1351 static ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
   1352 {
   1353 	ASTCBlockMode blockMode;
   1354 	blockMode.isError = true; // \note Set to false later, if not error.
   1355 
   1356 	blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
   1357 
   1358 	if (!blockMode.isVoidExtent)
   1359 	{
   1360 		if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
   1361 			return blockMode; // Invalid ("reserved").
   1362 
   1363 		deUint32 r = (deUint32)-1; // \note Set in the following branches.
   1364 
   1365 		if (getBits(blockModeData, 0, 1) == 0)
   1366 		{
   1367 			const deUint32 r0	= getBit(blockModeData, 4);
   1368 			const deUint32 r1	= getBit(blockModeData, 2);
   1369 			const deUint32 r2	= getBit(blockModeData, 3);
   1370 			const deUint32 i78	= getBits(blockModeData, 7, 8);
   1371 
   1372 			r = (r2 << 2) | (r1 << 1) | (r0 << 0);
   1373 
   1374 			if (i78 == 3)
   1375 			{
   1376 				const bool i5 = isBitSet(blockModeData, 5);
   1377 				blockMode.weightGridWidth	= i5 ? 10 : 6;
   1378 				blockMode.weightGridHeight	= i5 ? 6  : 10;
   1379 			}
   1380 			else
   1381 			{
   1382 				const deUint32 a = getBits(blockModeData, 5, 6);
   1383 				switch (i78)
   1384 				{
   1385 					case 0:		blockMode.weightGridWidth = 12;		blockMode.weightGridHeight = a + 2;									break;
   1386 					case 1:		blockMode.weightGridWidth = a + 2;	blockMode.weightGridHeight = 12;									break;
   1387 					case 2:		blockMode.weightGridWidth = a + 6;	blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;		break;
   1388 					default: DE_ASSERT(false);
   1389 				}
   1390 			}
   1391 		}
   1392 		else
   1393 		{
   1394 			const deUint32 r0	= getBit(blockModeData, 4);
   1395 			const deUint32 r1	= getBit(blockModeData, 0);
   1396 			const deUint32 r2	= getBit(blockModeData, 1);
   1397 			const deUint32 i23	= getBits(blockModeData, 2, 3);
   1398 			const deUint32 a	= getBits(blockModeData, 5, 6);
   1399 
   1400 			r = (r2 << 2) | (r1 << 1) | (r0 << 0);
   1401 
   1402 			if (i23 == 3)
   1403 			{
   1404 				const deUint32	b	= getBit(blockModeData, 7);
   1405 				const bool		i8	= isBitSet(blockModeData, 8);
   1406 				blockMode.weightGridWidth	= i8 ? b+2 : a+2;
   1407 				blockMode.weightGridHeight	= i8 ? a+2 : b+6;
   1408 			}
   1409 			else
   1410 			{
   1411 				const deUint32 b = getBits(blockModeData, 7, 8);
   1412 
   1413 				switch (i23)
   1414 				{
   1415 					case 0:		blockMode.weightGridWidth = b + 4;	blockMode.weightGridHeight = a + 2;	break;
   1416 					case 1:		blockMode.weightGridWidth = b + 8;	blockMode.weightGridHeight = a + 2;	break;
   1417 					case 2:		blockMode.weightGridWidth = a + 2;	blockMode.weightGridHeight = b + 8;	break;
   1418 					default: DE_ASSERT(false);
   1419 				}
   1420 			}
   1421 		}
   1422 
   1423 		const bool	zeroDH		= getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
   1424 		const bool	h			= zeroDH ? 0 : isBitSet(blockModeData, 9);
   1425 		blockMode.isDualPlane	= zeroDH ? 0 : isBitSet(blockModeData, 10);
   1426 
   1427 		{
   1428 			ISEMode&	m	= blockMode.weightISEParams.mode;
   1429 			int&		b	= blockMode.weightISEParams.numBits;
   1430 			m = ISEMODE_PLAIN_BIT;
   1431 			b = 0;
   1432 
   1433 			if (h)
   1434 			{
   1435 				switch (r)
   1436 				{
   1437 					case 2:							m = ISEMODE_QUINT;	b = 1;	break;
   1438 					case 3:		m = ISEMODE_TRIT;						b = 2;	break;
   1439 					case 4:												b = 4;	break;
   1440 					case 5:							m = ISEMODE_QUINT;	b = 2;	break;
   1441 					case 6:		m = ISEMODE_TRIT;						b = 3;	break;
   1442 					case 7:												b = 5;	break;
   1443 					default: DE_ASSERT(false);
   1444 				}
   1445 			}
   1446 			else
   1447 			{
   1448 				switch (r)
   1449 				{
   1450 					case 2: 											b = 1;	break;
   1451 					case 3: 	m = ISEMODE_TRIT;								break;
   1452 					case 4: 											b = 2;	break;
   1453 					case 5: 						m = ISEMODE_QUINT;			break;
   1454 					case 6: 	m = ISEMODE_TRIT;						b = 1;	break;
   1455 					case 7: 											b = 3;	break;
   1456 					default: DE_ASSERT(false);
   1457 				}
   1458 			}
   1459 		}
   1460 	}
   1461 
   1462 	blockMode.isError = false;
   1463 	return blockMode;
   1464 }
   1465 
   1466 static inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
   1467 {
   1468 	if (isSRGB)
   1469 	{
   1470 		deUint8* const dstU = (deUint8*)dst;
   1471 
   1472 		for (int i = 0; i < blockWidth*blockHeight; i++)
   1473 		{
   1474 			dstU[4*i + 0] = 0xff;
   1475 			dstU[4*i + 1] = 0;
   1476 			dstU[4*i + 2] = 0xff;
   1477 			dstU[4*i + 3] = 0xff;
   1478 		}
   1479 	}
   1480 	else
   1481 	{
   1482 		float* const dstF = (float*)dst;
   1483 
   1484 		for (int i = 0; i < blockWidth*blockHeight; i++)
   1485 		{
   1486 			dstF[4*i + 0] = 1.0f;
   1487 			dstF[4*i + 1] = 0.0f;
   1488 			dstF[4*i + 2] = 1.0f;
   1489 			dstF[4*i + 3] = 1.0f;
   1490 		}
   1491 	}
   1492 }
   1493 
   1494 static void decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
   1495 {
   1496 	const deUint32	minSExtent			= blockData.getBits(12, 24);
   1497 	const deUint32	maxSExtent			= blockData.getBits(25, 37);
   1498 	const deUint32	minTExtent			= blockData.getBits(38, 50);
   1499 	const deUint32	maxTExtent			= blockData.getBits(51, 63);
   1500 	const bool		allExtentsAllOnes	= minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
   1501 	const bool		isHDRBlock			= blockData.isBitSet(9);
   1502 
   1503 	if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
   1504 	{
   1505 		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
   1506 		return;
   1507 	}
   1508 
   1509 	const deUint32 rgba[4] =
   1510 	{
   1511 		blockData.getBits(64,  79),
   1512 		blockData.getBits(80,  95),
   1513 		blockData.getBits(96,  111),
   1514 		blockData.getBits(112, 127)
   1515 	};
   1516 
   1517 	if (isSRGB)
   1518 	{
   1519 		deUint8* const dstU = (deUint8*)dst;
   1520 		for (int i = 0; i < blockWidth*blockHeight; i++)
   1521 		for (int c = 0; c < 4; c++)
   1522 			dstU[i*4 + c] = (rgba[c] & 0xff00) >> 8;
   1523 	}
   1524 	else
   1525 	{
   1526 		float* const dstF = (float*)dst;
   1527 
   1528 		if (isHDRBlock)
   1529 		{
   1530 			for (int c = 0; c < 4; c++)
   1531 			{
   1532 				if (isFloat16InfOrNan(rgba[c]))
   1533 					throw tcu::InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
   1534 			}
   1535 
   1536 			for (int i = 0; i < blockWidth*blockHeight; i++)
   1537 			for (int c = 0; c < 4; c++)
   1538 				dstF[i*4 + c] = deFloat16To32((deFloat16)rgba[c]);
   1539 		}
   1540 		else
   1541 		{
   1542 			for (int i = 0; i < blockWidth*blockHeight; i++)
   1543 			for (int c = 0; c < 4; c++)
   1544 				dstF[i*4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
   1545 		}
   1546 	}
   1547 
   1548 	return;
   1549 }
   1550 
   1551 static void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
   1552 {
   1553 	if (numPartitions == 1)
   1554 		endpointModesDst[0] = blockData.getBits(13, 16);
   1555 	else
   1556 	{
   1557 		const deUint32 highLevelSelector = blockData.getBits(23, 24);
   1558 
   1559 		if (highLevelSelector == 0)
   1560 		{
   1561 			const deUint32 mode = blockData.getBits(25, 28);
   1562 			for (int i = 0; i < numPartitions; i++)
   1563 				endpointModesDst[i] = mode;
   1564 		}
   1565 		else
   1566 		{
   1567 			for (int partNdx = 0; partNdx < numPartitions; partNdx++)
   1568 			{
   1569 				const deUint32 cemClass		= highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
   1570 				const deUint32 lowBit0Ndx	= numPartitions + 2*partNdx;
   1571 				const deUint32 lowBit1Ndx	= numPartitions + 2*partNdx + 1;
   1572 				const deUint32 lowBit0		= blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
   1573 				const deUint32 lowBit1		= blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
   1574 
   1575 				endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
   1576 			}
   1577 		}
   1578 	}
   1579 }
   1580 
   1581 static inline int computeNumColorEndpointValues (deUint32 endpointMode)
   1582 {
   1583 	DE_ASSERT(endpointMode < 16);
   1584 	return (endpointMode/4 + 1) * 2;
   1585 }
   1586 
   1587 static int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
   1588 {
   1589 	int result = 0;
   1590 	for (int i = 0; i < numPartitions; i++)
   1591 		result += computeNumColorEndpointValues(endpointModes[i]);
   1592 	return result;
   1593 }
   1594 
   1595 static void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
   1596 {
   1597 	DE_ASSERT(de::inRange(numValues, 1, 5));
   1598 
   1599 	deUint32 m[5];
   1600 
   1601 	m[0]			= data.getNext(numBits);
   1602 	deUint32 T01	= data.getNext(2);
   1603 	m[1]			= data.getNext(numBits);
   1604 	deUint32 T23	= data.getNext(2);
   1605 	m[2]			= data.getNext(numBits);
   1606 	deUint32 T4		= data.getNext(1);
   1607 	m[3]			= data.getNext(numBits);
   1608 	deUint32 T56	= data.getNext(2);
   1609 	m[4]			= data.getNext(numBits);
   1610 	deUint32 T7		= data.getNext(1);
   1611 
   1612 	switch (numValues)
   1613 	{
   1614 		// \note Fall-throughs.
   1615 		case 1: T23		= 0;
   1616 		case 2: T4		= 0;
   1617 		case 3: T56		= 0;
   1618 		case 4: T7		= 0;
   1619 		case 5: break;
   1620 		default:
   1621 			DE_ASSERT(false);
   1622 	}
   1623 
   1624 	const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
   1625 
   1626 	static const deUint32 tritsFromT[256][5] =
   1627 	{
   1628 		{ 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
   1629 		{ 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
   1630 		{ 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
   1631 		{ 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
   1632 		{ 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
   1633 		{ 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
   1634 		{ 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
   1635 		{ 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
   1636 		{ 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
   1637 		{ 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
   1638 		{ 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
   1639 		{ 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
   1640 		{ 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
   1641 		{ 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
   1642 		{ 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
   1643 		{ 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
   1644 	};
   1645 
   1646 	const deUint32 (& trits)[5] = tritsFromT[T];
   1647 
   1648 	for (int i = 0; i < numValues; i++)
   1649 	{
   1650 		dst[i].m	= m[i];
   1651 		dst[i].tq	= trits[i];
   1652 		dst[i].v	= (trits[i] << numBits) + m[i];
   1653 	}
   1654 }
   1655 
   1656 static void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
   1657 {
   1658 	DE_ASSERT(de::inRange(numValues, 1, 3));
   1659 
   1660 	deUint32 m[3];
   1661 
   1662 	m[0]			= data.getNext(numBits);
   1663 	deUint32 Q012	= data.getNext(3);
   1664 	m[1]			= data.getNext(numBits);
   1665 	deUint32 Q34	= data.getNext(2);
   1666 	m[2]			= data.getNext(numBits);
   1667 	deUint32 Q56	= data.getNext(2);
   1668 
   1669 	switch (numValues)
   1670 	{
   1671 		// \note Fall-throughs.
   1672 		case 1: Q34		= 0;
   1673 		case 2: Q56		= 0;
   1674 		case 3: break;
   1675 		default:
   1676 			DE_ASSERT(false);
   1677 	}
   1678 
   1679 	const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
   1680 
   1681 	static const deUint32 quintsFromQ[256][3] =
   1682 	{
   1683 		{ 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
   1684 		{ 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
   1685 		{ 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
   1686 		{ 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
   1687 		{ 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
   1688 		{ 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
   1689 		{ 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
   1690 		{ 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
   1691 	};
   1692 
   1693 	const deUint32 (& quints)[3] = quintsFromQ[Q];
   1694 
   1695 	for (int i = 0; i < numValues; i++)
   1696 	{
   1697 		dst[i].m	= m[i];
   1698 		dst[i].tq	= quints[i];
   1699 		dst[i].v	= (quints[i] << numBits) + m[i];
   1700 	}
   1701 }
   1702 
   1703 static inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
   1704 {
   1705 	dst[0].m = data.getNext(numBits);
   1706 	dst[0].v = dst[0].m;
   1707 }
   1708 
   1709 static void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
   1710 {
   1711 	if (params.mode == ISEMODE_TRIT)
   1712 	{
   1713 		const int numBlocks = divRoundUp(numValues, 5);
   1714 		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   1715 		{
   1716 			const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
   1717 			decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
   1718 		}
   1719 	}
   1720 	else if (params.mode == ISEMODE_QUINT)
   1721 	{
   1722 		const int numBlocks = divRoundUp(numValues, 3);
   1723 		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
   1724 		{
   1725 			const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
   1726 			decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
   1727 		}
   1728 	}
   1729 	else
   1730 	{
   1731 		DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
   1732 		for (int i = 0; i < numValues; i++)
   1733 			decodeISEBitBlock(&dst[i], data, params.numBits);
   1734 	}
   1735 }
   1736 
   1737 static ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
   1738 {
   1739 	int curBitsForTritMode		= 6;
   1740 	int curBitsForQuintMode		= 5;
   1741 	int curBitsForPlainBitMode	= 8;
   1742 
   1743 	while (true)
   1744 	{
   1745 		DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
   1746 
   1747 		const int tritRange			= curBitsForTritMode > 0		? (3 << curBitsForTritMode) - 1			: -1;
   1748 		const int quintRange		= curBitsForQuintMode > 0		? (5 << curBitsForQuintMode) - 1		: -1;
   1749 		const int plainBitRange		= curBitsForPlainBitMode > 0	? (1 << curBitsForPlainBitMode) - 1		: -1;
   1750 		const int maxRange			= de::max(de::max(tritRange, quintRange), plainBitRange);
   1751 
   1752 		if (maxRange == tritRange)
   1753 		{
   1754 			const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
   1755 			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
   1756 				return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
   1757 			curBitsForTritMode--;
   1758 		}
   1759 		else if (maxRange == quintRange)
   1760 		{
   1761 			const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
   1762 			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
   1763 				return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
   1764 			curBitsForQuintMode--;
   1765 		}
   1766 		else
   1767 		{
   1768 			const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
   1769 			DE_ASSERT(maxRange == plainBitRange);
   1770 			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
   1771 				return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
   1772 			curBitsForPlainBitMode--;
   1773 		}
   1774 	}
   1775 }
   1776 
   1777 static void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
   1778 {
   1779 	if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
   1780 	{
   1781 		const int rangeCase				= iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
   1782 		DE_ASSERT(de::inRange(rangeCase, 0, 10));
   1783 		static const deUint32	Ca[11]	= { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
   1784 		const deUint32			C		= Ca[rangeCase];
   1785 
   1786 		for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
   1787 		{
   1788 			const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
   1789 			const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
   1790 			const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
   1791 			const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
   1792 			const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
   1793 			const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
   1794 
   1795 			const deUint32 A = a == 0 ? 0 : (1<<9)-1;
   1796 			const deUint32 B = rangeCase == 0	? 0
   1797 							 : rangeCase == 1	? 0
   1798 							 : rangeCase == 2	? (b << 8) |									(b << 4) |				(b << 2) |	(b << 1)
   1799 							 : rangeCase == 3	? (b << 8) |												(b << 3) |	(b << 2)
   1800 							 : rangeCase == 4	? (c << 8) | (b << 7) |										(c << 3) |	(b << 2) |	(c << 1) |	(b << 0)
   1801 							 : rangeCase == 5	? (c << 8) | (b << 7) |													(c << 2) |	(b << 1) |	(c << 0)
   1802 							 : rangeCase == 6	? (d << 8) | (c << 7) | (b << 6) |										(d << 2) |	(c << 1) |	(b << 0)
   1803 							 : rangeCase == 7	? (d << 8) | (c << 7) | (b << 6) |													(d << 1) |	(c << 0)
   1804 							 : rangeCase == 8	? (e << 8) | (d << 7) | (c << 6) | (b << 5) |										(e << 1) |	(d << 0)
   1805 							 : rangeCase == 9	? (e << 8) | (d << 7) | (c << 6) | (b << 5) |													(e << 0)
   1806 							 : rangeCase == 10	? (f << 8) | (e << 7) | (d << 6) | (c << 5) |	(b << 4) |										(f << 0)
   1807 							 : (deUint32)-1;
   1808 			DE_ASSERT(B != (deUint32)-1);
   1809 
   1810 			dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
   1811 		}
   1812 	}
   1813 	else
   1814 	{
   1815 		DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
   1816 
   1817 		for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
   1818 			dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
   1819 	}
   1820 }
   1821 
   1822 static inline void bitTransferSigned (deInt32& a, deInt32& b)
   1823 {
   1824 	b >>= 1;
   1825 	b |= a & 0x80;
   1826 	a >>= 1;
   1827 	a &= 0x3f;
   1828 	if (isBitSet(a, 5))
   1829 		a -= 0x40;
   1830 }
   1831 
   1832 static inline UVec4 clampedRGBA (const tcu::IVec4& rgba)
   1833 {
   1834 	return UVec4(de::clamp(rgba.x(), 0, 0xff),
   1835 				 de::clamp(rgba.y(), 0, 0xff),
   1836 				 de::clamp(rgba.z(), 0, 0xff),
   1837 				 de::clamp(rgba.w(), 0, 0xff));
   1838 }
   1839 
   1840 static inline tcu::IVec4 blueContract (int r, int g, int b, int a)
   1841 {
   1842 	return tcu::IVec4((r+b)>>1, (g+b)>>1, b, a);
   1843 }
   1844 
   1845 static inline bool isColorEndpointModeHDR (deUint32 mode)
   1846 {
   1847 	return mode == 2	||
   1848 		   mode == 3	||
   1849 		   mode == 7	||
   1850 		   mode == 11	||
   1851 		   mode == 14	||
   1852 		   mode == 15;
   1853 }
   1854 
   1855 static void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
   1856 {
   1857 	const deUint32 m10		= getBit(v1, 7) | (getBit(v2, 7) << 1);
   1858 	const deUint32 m23		= getBits(v0, 6, 7);
   1859 	const deUint32 majComp	= m10 != 3	? m10
   1860 							: m23 != 3	? m23
   1861 							:			  0;
   1862 	const deUint32 mode		= m10 != 3	? m23
   1863 							: m23 != 3	? 4
   1864 							:			  5;
   1865 
   1866 	deInt32			red		= (deInt32)getBits(v0, 0, 5);
   1867 	deInt32			green	= (deInt32)getBits(v1, 0, 4);
   1868 	deInt32			blue	= (deInt32)getBits(v2, 0, 4);
   1869 	deInt32			scale	= (deInt32)getBits(v3, 0, 4);
   1870 
   1871 	{
   1872 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
   1873 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
   1874 
   1875 		const deUint32	x0	= getBit(v1, 6);
   1876 		const deUint32	x1	= getBit(v1, 5);
   1877 		const deUint32	x2	= getBit(v2, 6);
   1878 		const deUint32	x3	= getBit(v2, 5);
   1879 		const deUint32	x4	= getBit(v3, 7);
   1880 		const deUint32	x5	= getBit(v3, 6);
   1881 		const deUint32	x6	= getBit(v3, 5);
   1882 
   1883 		deInt32&		R	= red;
   1884 		deInt32&		G	= green;
   1885 		deInt32&		B	= blue;
   1886 		deInt32&		S	= scale;
   1887 
   1888 		switch (mode)
   1889 		{
   1890 			case 0: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,10,  R,6,  S,6,   S,5); break;
   1891 			case 1: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  R,10,  R,9); break;
   1892 			case 2: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,6,   S,7,  S,6,   S,5); break;
   1893 			case 3: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  S,6,   S,5); break;
   1894 			case 4: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  R,7,   S,5); break;
   1895 			case 5: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  S,6,   S,5); break;
   1896 			default:
   1897 				DE_ASSERT(false);
   1898 		}
   1899 
   1900 #undef ASSIGN_X_BITS
   1901 #undef SHOR
   1902 	}
   1903 
   1904 	static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
   1905 	DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
   1906 
   1907 	red		<<= shiftAmounts[mode];
   1908 	green	<<= shiftAmounts[mode];
   1909 	blue	<<= shiftAmounts[mode];
   1910 	scale	<<= shiftAmounts[mode];
   1911 
   1912 	if (mode != 5)
   1913 	{
   1914 		green	= red - green;
   1915 		blue	= red - blue;
   1916 	}
   1917 
   1918 	if (majComp == 1)
   1919 		std::swap(red, green);
   1920 	else if (majComp == 2)
   1921 		std::swap(red, blue);
   1922 
   1923 	e0 = UVec4(de::clamp(red	- scale,	0, 0xfff),
   1924 			   de::clamp(green	- scale,	0, 0xfff),
   1925 			   de::clamp(blue	- scale,	0, 0xfff),
   1926 			   0x780);
   1927 
   1928 	e1 = UVec4(de::clamp(red,				0, 0xfff),
   1929 			   de::clamp(green,				0, 0xfff),
   1930 			   de::clamp(blue,				0, 0xfff),
   1931 			   0x780);
   1932 }
   1933 
   1934 static void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
   1935 {
   1936 	const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
   1937 
   1938 	if (major == 3)
   1939 	{
   1940 		e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
   1941 		e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
   1942 	}
   1943 	else
   1944 	{
   1945 		const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
   1946 
   1947 		deInt32 a	= (deInt32)((getBit(v1, 6) << 8) | v0);
   1948 		deInt32 c	= (deInt32)(getBits(v1, 0, 5));
   1949 		deInt32 b0	= (deInt32)(getBits(v2, 0, 5));
   1950 		deInt32 b1	= (deInt32)(getBits(v3, 0, 5));
   1951 		deInt32 d0	= (deInt32)(getBits(v4, 0, 4));
   1952 		deInt32 d1	= (deInt32)(getBits(v5, 0, 4));
   1953 
   1954 		{
   1955 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
   1956 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
   1957 
   1958 			const deUint32 x0 = getBit(v2, 6);
   1959 			const deUint32 x1 = getBit(v3, 6);
   1960 			const deUint32 x2 = getBit(v4, 6);
   1961 			const deUint32 x3 = getBit(v5, 6);
   1962 			const deUint32 x4 = getBit(v4, 5);
   1963 			const deUint32 x5 = getBit(v5, 5);
   1964 
   1965 			switch (mode)
   1966 			{
   1967 				case 0: ASSIGN_X_BITS(b0,6,  b1,6,   d0,6,  d1,6,  d0,5,  d1,5); break;
   1968 				case 1: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  d0,5,  d1,5); break;
   1969 				case 2: ASSIGN_X_BITS(a,9,   c,6,    d0,6,  d1,6,  d0,5,  d1,5); break;
   1970 				case 3: ASSIGN_X_BITS(b0,6,  b1,6,   a,9,   c,6,   d0,5,  d1,5); break;
   1971 				case 4: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  a,9,   a,10); break;
   1972 				case 5: ASSIGN_X_BITS(a,9,   a,10,   c,7,   c,6,   d0,5,  d1,5); break;
   1973 				case 6: ASSIGN_X_BITS(b0,6,  b1,6,   a,11,  c,6,   a,9,   a,10); break;
   1974 				case 7: ASSIGN_X_BITS(a,9,   a,10,   a,11,  c,6,   d0,5,  d1,5); break;
   1975 				default:
   1976 					DE_ASSERT(false);
   1977 			}
   1978 
   1979 #undef ASSIGN_X_BITS
   1980 #undef SHOR
   1981 		}
   1982 
   1983 		static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
   1984 		DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
   1985 
   1986 		d0 = signExtend(d0, numDBits[mode]);
   1987 		d1 = signExtend(d1, numDBits[mode]);
   1988 
   1989 		const int shiftAmount = (mode >> 1) ^ 3;
   1990 		a	<<= shiftAmount;
   1991 		c	<<= shiftAmount;
   1992 		b0	<<= shiftAmount;
   1993 		b1	<<= shiftAmount;
   1994 		d0	<<= shiftAmount;
   1995 		d1	<<= shiftAmount;
   1996 
   1997 		e0 = UVec4(de::clamp(a-c,			0, 0xfff),
   1998 				   de::clamp(a-b0-c-d0,		0, 0xfff),
   1999 				   de::clamp(a-b1-c-d1,		0, 0xfff),
   2000 				   0x780);
   2001 
   2002 		e1 = UVec4(de::clamp(a,				0, 0xfff),
   2003 				   de::clamp(a-b0,			0, 0xfff),
   2004 				   de::clamp(a-b1,			0, 0xfff),
   2005 				   0x780);
   2006 
   2007 		if (major == 1)
   2008 		{
   2009 			std::swap(e0.x(), e0.y());
   2010 			std::swap(e1.x(), e1.y());
   2011 		}
   2012 		else if (major == 2)
   2013 		{
   2014 			std::swap(e0.x(), e0.z());
   2015 			std::swap(e1.x(), e1.z());
   2016 		}
   2017 	}
   2018 }
   2019 
   2020 static void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
   2021 {
   2022 	decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
   2023 
   2024 	const deUint32	mode	= (getBit(v7In, 7) << 1) | getBit(v6In, 7);
   2025 	deInt32			v6		= (deInt32)getBits(v6In, 0, 6);
   2026 	deInt32			v7		= (deInt32)getBits(v7In, 0, 6);
   2027 
   2028 	if (mode == 3)
   2029 	{
   2030 		e0.w() = v6 << 5;
   2031 		e1.w() = v7 << 5;
   2032 	}
   2033 	else
   2034 	{
   2035 		v6 |= (v7 << (mode+1)) & 0x780;
   2036 		v7 &= (0x3f >> mode);
   2037 		v7 ^= 0x20 >> mode;
   2038 		v7 -= 0x20 >> mode;
   2039 		v6 <<= 4-mode;
   2040 		v7 <<= 4-mode;
   2041 
   2042 		v7 += v6;
   2043 		v7 = de::clamp(v7, 0, 0xfff);
   2044 		e0.w() = v6;
   2045 		e1.w() = v7;
   2046 	}
   2047 }
   2048 
   2049 static void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
   2050 {
   2051 	int unquantizedNdx = 0;
   2052 
   2053 	for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
   2054 	{
   2055 		const deUint32		endpointMode	= endpointModes[partitionNdx];
   2056 		const deUint32*		v				= &unquantizedEndpoints[unquantizedNdx];
   2057 		UVec4&				e0				= dst[partitionNdx].e0;
   2058 		UVec4&				e1				= dst[partitionNdx].e1;
   2059 
   2060 		unquantizedNdx += computeNumColorEndpointValues(endpointMode);
   2061 
   2062 		switch (endpointMode)
   2063 		{
   2064 			case 0:
   2065 				e0 = UVec4(v[0], v[0], v[0], 0xff);
   2066 				e1 = UVec4(v[1], v[1], v[1], 0xff);
   2067 				break;
   2068 
   2069 			case 1:
   2070 			{
   2071 				const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
   2072 				const deUint32 L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
   2073 				e0 = UVec4(L0, L0, L0, 0xff);
   2074 				e1 = UVec4(L1, L1, L1, 0xff);
   2075 				break;
   2076 			}
   2077 
   2078 			case 2:
   2079 			{
   2080 				const deUint32 v1Gr		= v[1] >= v[0];
   2081 				const deUint32 y0		= v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
   2082 				const deUint32 y1		= v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
   2083 
   2084 				e0 = UVec4(y0, y0, y0, 0x780);
   2085 				e1 = UVec4(y1, y1, y1, 0x780);
   2086 				break;
   2087 			}
   2088 
   2089 			case 3:
   2090 			{
   2091 				const bool		m	= isBitSet(v[0], 7);
   2092 				const deUint32	y0	= m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
   2093 										: (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
   2094 				const deUint32	d	= m ? getBits(v[1], 0, 4) << 2
   2095 										: getBits(v[1], 0, 3) << 1;
   2096 				const deUint32	y1	= de::min(0xfffu, y0+d);
   2097 
   2098 				e0 = UVec4(y0, y0, y0, 0x780);
   2099 				e1 = UVec4(y1, y1, y1, 0x780);
   2100 				break;
   2101 			}
   2102 
   2103 			case 4:
   2104 				e0 = UVec4(v[0], v[0], v[0], v[2]);
   2105 				e1 = UVec4(v[1], v[1], v[1], v[3]);
   2106 				break;
   2107 
   2108 			case 5:
   2109 			{
   2110 				deInt32 v0 = (deInt32)v[0];
   2111 				deInt32 v1 = (deInt32)v[1];
   2112 				deInt32 v2 = (deInt32)v[2];
   2113 				deInt32 v3 = (deInt32)v[3];
   2114 				bitTransferSigned(v1, v0);
   2115 				bitTransferSigned(v3, v2);
   2116 
   2117 				e0 = clampedRGBA(tcu::IVec4(v0,		v0,		v0,		v2));
   2118 				e1 = clampedRGBA(tcu::IVec4(v0+v1,	v0+v1,	v0+v1,	v2+v3));
   2119 				break;
   2120 			}
   2121 
   2122 			case 6:
   2123 				e0 = UVec4((v[0]*v[3]) >> 8,	(v[1]*v[3]) >> 8,	(v[2]*v[3]) >> 8,	0xff);
   2124 				e1 = UVec4(v[0],				v[1],				v[2],				0xff);
   2125 				break;
   2126 
   2127 			case 7:
   2128 				decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
   2129 				break;
   2130 
   2131 			case 8:
   2132 				if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
   2133 				{
   2134 					e0 = UVec4(v[0], v[2], v[4], 0xff);
   2135 					e1 = UVec4(v[1], v[3], v[5], 0xff);
   2136 				}
   2137 				else
   2138 				{
   2139 					e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
   2140 					e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
   2141 				}
   2142 				break;
   2143 
   2144 			case 9:
   2145 			{
   2146 				deInt32 v0 = (deInt32)v[0];
   2147 				deInt32 v1 = (deInt32)v[1];
   2148 				deInt32 v2 = (deInt32)v[2];
   2149 				deInt32 v3 = (deInt32)v[3];
   2150 				deInt32 v4 = (deInt32)v[4];
   2151 				deInt32 v5 = (deInt32)v[5];
   2152 				bitTransferSigned(v1, v0);
   2153 				bitTransferSigned(v3, v2);
   2154 				bitTransferSigned(v5, v4);
   2155 
   2156 				if (v1+v3+v5 >= 0)
   2157 				{
   2158 					e0 = clampedRGBA(tcu::IVec4(v0,		v2,		v4,		0xff));
   2159 					e1 = clampedRGBA(tcu::IVec4(v0+v1,	v2+v3,	v4+v5,	0xff));
   2160 				}
   2161 				else
   2162 				{
   2163 					e0 = clampedRGBA(blueContract(v0+v1,	v2+v3,	v4+v5,	0xff));
   2164 					e1 = clampedRGBA(blueContract(v0,		v2,		v4,		0xff));
   2165 				}
   2166 				break;
   2167 			}
   2168 
   2169 			case 10:
   2170 				e0 = UVec4((v[0]*v[3]) >> 8,	(v[1]*v[3]) >> 8,	(v[2]*v[3]) >> 8,	v[4]);
   2171 				e1 = UVec4(v[0],				v[1],				v[2],				v[5]);
   2172 				break;
   2173 
   2174 			case 11:
   2175 				decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
   2176 				break;
   2177 
   2178 			case 12:
   2179 				if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
   2180 				{
   2181 					e0 = UVec4(v[0], v[2], v[4], v[6]);
   2182 					e1 = UVec4(v[1], v[3], v[5], v[7]);
   2183 				}
   2184 				else
   2185 				{
   2186 					e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
   2187 					e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
   2188 				}
   2189 				break;
   2190 
   2191 			case 13:
   2192 			{
   2193 				deInt32 v0 = (deInt32)v[0];
   2194 				deInt32 v1 = (deInt32)v[1];
   2195 				deInt32 v2 = (deInt32)v[2];
   2196 				deInt32 v3 = (deInt32)v[3];
   2197 				deInt32 v4 = (deInt32)v[4];
   2198 				deInt32 v5 = (deInt32)v[5];
   2199 				deInt32 v6 = (deInt32)v[6];
   2200 				deInt32 v7 = (deInt32)v[7];
   2201 				bitTransferSigned(v1, v0);
   2202 				bitTransferSigned(v3, v2);
   2203 				bitTransferSigned(v5, v4);
   2204 				bitTransferSigned(v7, v6);
   2205 
   2206 				if (v1+v3+v5 >= 0)
   2207 				{
   2208 					e0 = clampedRGBA(tcu::IVec4(v0,		v2,		v4,		v6));
   2209 					e1 = clampedRGBA(tcu::IVec4(v0+v1,	v2+v3,	v4+v5,	v6+v7));
   2210 				}
   2211 				else
   2212 				{
   2213 					e0 = clampedRGBA(blueContract(v0+v1,	v2+v3,	v4+v5,	v6+v7));
   2214 					e1 = clampedRGBA(blueContract(v0,		v2,		v4,		v6));
   2215 				}
   2216 
   2217 				break;
   2218 			}
   2219 
   2220 			case 14:
   2221 				decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
   2222 				e0.w() = v[6];
   2223 				e1.w() = v[7];
   2224 				break;
   2225 
   2226 			case 15:
   2227 				decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
   2228 				break;
   2229 
   2230 			default:
   2231 				DE_ASSERT(false);
   2232 		}
   2233 	}
   2234 }
   2235 
   2236 static void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
   2237 {
   2238 	const int			colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
   2239 	ISEDecodedResult	colorEndpointData[18];
   2240 
   2241 	{
   2242 		BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
   2243 		decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
   2244 	}
   2245 
   2246 	{
   2247 		deUint32 unquantizedEndpoints[18];
   2248 		unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
   2249 		decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
   2250 	}
   2251 }
   2252 
   2253 static void unquantizeWeights (deUint32* dst, const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
   2254 {
   2255 	const int			numWeights	= computeNumWeights(blockMode);
   2256 	const ISEParams&	iseParams	= blockMode.weightISEParams;
   2257 
   2258 	if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
   2259 	{
   2260 		const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
   2261 
   2262 		if (rangeCase == 0 || rangeCase == 1)
   2263 		{
   2264 			static const deUint32 map0[3]	= { 0, 32, 63 };
   2265 			static const deUint32 map1[5]	= { 0, 16, 32, 47, 63 };
   2266 			const deUint32* const map		= rangeCase == 0 ? &map0[0] : &map1[0];
   2267 			for (int i = 0; i < numWeights; i++)
   2268 			{
   2269 				DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
   2270 				dst[i] = map[weightGrid[i].v];
   2271 			}
   2272 		}
   2273 		else
   2274 		{
   2275 			DE_ASSERT(rangeCase <= 6);
   2276 			static const deUint32	Ca[5]	= { 50, 28, 23, 13, 11 };
   2277 			const deUint32			C		= Ca[rangeCase-2];
   2278 
   2279 			for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
   2280 			{
   2281 				const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
   2282 				const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
   2283 				const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
   2284 
   2285 				const deUint32 A = a == 0 ? 0 : (1<<7)-1;
   2286 				const deUint32 B = rangeCase == 2 ? 0
   2287 								 : rangeCase == 3 ? 0
   2288 								 : rangeCase == 4 ? (b << 6) |					(b << 2) |				(b << 0)
   2289 								 : rangeCase == 5 ? (b << 6) |								(b << 1)
   2290 								 : rangeCase == 6 ? (c << 6) | (b << 5) |					(c << 1) |	(b << 0)
   2291 								 : (deUint32)-1;
   2292 
   2293 				dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
   2294 			}
   2295 		}
   2296 	}
   2297 	else
   2298 	{
   2299 		DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
   2300 
   2301 		for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
   2302 			dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
   2303 	}
   2304 
   2305 	for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
   2306 		dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
   2307 }
   2308 
   2309 static void interpolateWeights (TexelWeightPair* dst, const deUint32* unquantizedWeights, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
   2310 {
   2311 	const int		numWeightsPerTexel	= blockMode.isDualPlane ? 2 : 1;
   2312 	const deUint32	scaleX				= (1024 + blockWidth/2) / (blockWidth-1);
   2313 	const deUint32	scaleY				= (1024 + blockHeight/2) / (blockHeight-1);
   2314 
   2315 	for (int texelY = 0; texelY < blockHeight; texelY++)
   2316 	{
   2317 		for (int texelX = 0; texelX < blockWidth; texelX++)
   2318 		{
   2319 			const deUint32 gX	= (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
   2320 			const deUint32 gY	= (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
   2321 			const deUint32 jX	= gX >> 4;
   2322 			const deUint32 jY	= gY >> 4;
   2323 			const deUint32 fX	= gX & 0xf;
   2324 			const deUint32 fY	= gY & 0xf;
   2325 			const deUint32 w11	= (fX*fY + 8) >> 4;
   2326 			const deUint32 w10	= fY - w11;
   2327 			const deUint32 w01	= fX - w11;
   2328 			const deUint32 w00	= 16 - fX - fY + w11;
   2329 			const deUint32 v0	= jY*blockMode.weightGridWidth + jX;
   2330 
   2331 			for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
   2332 			{
   2333 				const deUint32 p00	= unquantizedWeights[(v0)									* numWeightsPerTexel + texelWeightNdx];
   2334 				const deUint32 p01	= unquantizedWeights[(v0 + 1)								* numWeightsPerTexel + texelWeightNdx];
   2335 				const deUint32 p10	= unquantizedWeights[(v0 + blockMode.weightGridWidth)		* numWeightsPerTexel + texelWeightNdx];
   2336 				const deUint32 p11	= unquantizedWeights[(v0 + blockMode.weightGridWidth + 1)	* numWeightsPerTexel + texelWeightNdx];
   2337 
   2338 				dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
   2339 			}
   2340 		}
   2341 	}
   2342 }
   2343 
   2344 static void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
   2345 {
   2346 	ISEDecodedResult weightGrid[64];
   2347 
   2348 	{
   2349 		BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
   2350 		decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
   2351 	}
   2352 
   2353 	{
   2354 		deUint32 unquantizedWeights[64];
   2355 		unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
   2356 		interpolateWeights(dst, &unquantizedWeights[0], blockWidth, blockHeight, blockMode);
   2357 	}
   2358 }
   2359 
   2360 static inline deUint32 hash52 (deUint32 v)
   2361 {
   2362 	deUint32 p = v;
   2363 	p ^= p >> 15;	p -= p << 17;	p += p << 7;	p += p << 4;
   2364 	p ^= p >>  5;	p += p << 16;	p ^= p >> 7;	p ^= p >> 3;
   2365 	p ^= p <<  6;	p ^= p >> 17;
   2366 	return p;
   2367 }
   2368 
   2369 static int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
   2370 {
   2371 	DE_ASSERT(zIn == 0);
   2372 	const deUint32	x		= smallBlock ? xIn << 1 : xIn;
   2373 	const deUint32	y		= smallBlock ? yIn << 1 : yIn;
   2374 	const deUint32	z		= smallBlock ? zIn << 1 : zIn;
   2375 	const deUint32	seed	= seedIn + 1024*(numPartitions-1);
   2376 	const deUint32	rnum	= hash52(seed);
   2377 	deUint8			seed1	=  rnum							& 0xf;
   2378 	deUint8			seed2	= (rnum >>  4)					& 0xf;
   2379 	deUint8			seed3	= (rnum >>  8)					& 0xf;
   2380 	deUint8			seed4	= (rnum >> 12)					& 0xf;
   2381 	deUint8			seed5	= (rnum >> 16)					& 0xf;
   2382 	deUint8			seed6	= (rnum >> 20)					& 0xf;
   2383 	deUint8			seed7	= (rnum >> 24)					& 0xf;
   2384 	deUint8			seed8	= (rnum >> 28)					& 0xf;
   2385 	deUint8			seed9	= (rnum >> 18)					& 0xf;
   2386 	deUint8			seed10	= (rnum >> 22)					& 0xf;
   2387 	deUint8			seed11	= (rnum >> 26)					& 0xf;
   2388 	deUint8			seed12	= ((rnum >> 30) | (rnum << 2))	& 0xf;
   2389 
   2390 	seed1 *= seed1;		seed5 *= seed5;		seed9  *= seed9;
   2391 	seed2 *= seed2;		seed6 *= seed6;		seed10 *= seed10;
   2392 	seed3 *= seed3;		seed7 *= seed7;		seed11 *= seed11;
   2393 	seed4 *= seed4;		seed8 *= seed8;		seed12 *= seed12;
   2394 
   2395 	const int shA = (seed & 2) != 0		? 4		: 5;
   2396 	const int shB = numPartitions == 3	? 6		: 5;
   2397 	const int sh1 = (seed & 1) != 0		? shA	: shB;
   2398 	const int sh2 = (seed & 1) != 0		? shB	: shA;
   2399 	const int sh3 = (seed & 0x10) != 0	? sh1	: sh2;
   2400 
   2401 	seed1 >>= sh1;		seed2  >>= sh2;		seed3  >>= sh1;		seed4  >>= sh2;
   2402 	seed5 >>= sh1;		seed6  >>= sh2;		seed7  >>= sh1;		seed8  >>= sh2;
   2403 	seed9 >>= sh3;		seed10 >>= sh3;		seed11 >>= sh3;		seed12 >>= sh3;
   2404 
   2405 	const int a =						0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
   2406 	const int b =						0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
   2407 	const int c = numPartitions >= 3 ?	0x3f & (seed5*x + seed6*y + seed9*z  + (rnum >>  6))	: 0;
   2408 	const int d = numPartitions >= 4 ?	0x3f & (seed7*x + seed8*y + seed10*z + (rnum >>  2))	: 0;
   2409 
   2410 	return a >= b && a >= c && a >= d	? 0
   2411 		 : b >= c && b >= d				? 1
   2412 		 : c >= d						? 2
   2413 		 :								  3;
   2414 }
   2415 
   2416 static void setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
   2417 							int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
   2418 {
   2419 	const bool	smallBlock = blockWidth*blockHeight < 31;
   2420 	bool		isHDREndpoint[4];
   2421 
   2422 	for (int i = 0; i < numPartitions; i++)
   2423 		isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
   2424 
   2425 	for (int texelY = 0; texelY < blockHeight; texelY++)
   2426 	for (int texelX = 0; texelX < blockWidth; texelX++)
   2427 	{
   2428 		const int				texelNdx			= texelY*blockWidth + texelX;
   2429 		const int				colorEndpointNdx	= numPartitions == 1 ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
   2430 		DE_ASSERT(colorEndpointNdx < numPartitions);
   2431 		const UVec4&			e0					= colorEndpoints[colorEndpointNdx].e0;
   2432 		const UVec4&			e1					= colorEndpoints[colorEndpointNdx].e1;
   2433 		const TexelWeightPair&	weight				= texelWeights[texelNdx];
   2434 
   2435 		if (isLDRMode && isHDREndpoint[colorEndpointNdx])
   2436 		{
   2437 			if (isSRGB)
   2438 			{
   2439 				((deUint8*)dst)[texelNdx*4 + 0] = 0xff;
   2440 				((deUint8*)dst)[texelNdx*4 + 1] = 0;
   2441 				((deUint8*)dst)[texelNdx*4 + 2] = 0xff;
   2442 				((deUint8*)dst)[texelNdx*4 + 3] = 0xff;
   2443 			}
   2444 			else
   2445 			{
   2446 				((float*)dst)[texelNdx*4 + 0] = 1.0f;
   2447 				((float*)dst)[texelNdx*4 + 1] = 0;
   2448 				((float*)dst)[texelNdx*4 + 2] = 1.0f;
   2449 				((float*)dst)[texelNdx*4 + 3] = 1.0f;
   2450 			}
   2451 		}
   2452 		else
   2453 		{
   2454 			for (int channelNdx = 0; channelNdx < 4; channelNdx++)
   2455 			{
   2456 				if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
   2457 				{
   2458 					const deUint32 c0	= (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
   2459 					const deUint32 c1	= (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
   2460 					const deUint32 w	= weight.w[ccs == channelNdx ? 1 : 0];
   2461 					const deUint32 c	= (c0*(64-w) + c1*w + 32) / 64;
   2462 
   2463 					if (isSRGB)
   2464 						((deUint8*)dst)[texelNdx*4 + channelNdx] = (c & 0xff00) >> 8;
   2465 					else
   2466 						((float*)dst)[texelNdx*4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
   2467 				}
   2468 				else
   2469 				{
   2470 					DE_STATIC_ASSERT((isSameType<deFloat16, deUint16>::V));
   2471 					const deUint32		c0	= e0[channelNdx] << 4;
   2472 					const deUint32		c1	= e1[channelNdx] << 4;
   2473 					const deUint32		w	= weight.w[ccs == channelNdx ? 1 : 0];
   2474 					const deUint32		c	= (c0*(64-w) + c1*w + 32) / 64;
   2475 					const deUint32		e	= getBits(c, 11, 15);
   2476 					const deUint32		m	= getBits(c, 0, 10);
   2477 					const deUint32		mt	= m < 512		? 3*m
   2478 											: m >= 1536		? 5*m - 2048
   2479 											:				  4*m - 512;
   2480 					const deFloat16		cf	= (e << 10) + (mt >> 3);
   2481 
   2482 					((float*)dst)[texelNdx*4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
   2483 				}
   2484 			}
   2485 		}
   2486 	}
   2487 }
   2488 
   2489 static void decompressASTCBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
   2490 {
   2491 	DE_ASSERT(isLDR || !isSRGB);
   2492 
   2493 	// Decode block mode.
   2494 
   2495 	const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
   2496 
   2497 	// Check for block mode errors.
   2498 
   2499 	if (blockMode.isError)
   2500 	{
   2501 		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
   2502 		return;
   2503 	}
   2504 
   2505 	// Separate path for void-extent.
   2506 
   2507 	if (blockMode.isVoidExtent)
   2508 	{
   2509 		decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
   2510 		return;
   2511 	}
   2512 
   2513 	// Compute weight grid values.
   2514 
   2515 	const int numWeights			= computeNumWeights(blockMode);
   2516 	const int numWeightDataBits		= computeNumRequiredBits(blockMode.weightISEParams, numWeights);
   2517 	const int numPartitions			= (int)blockData.getBits(11, 12) + 1;
   2518 
   2519 	// Check for errors in weight grid, partition and dual-plane parameters.
   2520 
   2521 	if (numWeights > 64								||
   2522 		numWeightDataBits > 96						||
   2523 		numWeightDataBits < 24						||
   2524 		blockMode.weightGridWidth > blockWidth		||
   2525 		blockMode.weightGridHeight > blockHeight	||
   2526 		(numPartitions == 4 && blockMode.isDualPlane))
   2527 	{
   2528 		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
   2529 		return;
   2530 	}
   2531 
   2532 	// Compute number of bits available for color endpoint data.
   2533 
   2534 	const bool	isSingleUniqueCem			= numPartitions == 1 || blockData.getBits(23, 24) == 0;
   2535 	const int	numConfigDataBits			= (numPartitions == 1 ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
   2536 											  (blockMode.isDualPlane ? 2 : 0);
   2537 	const int	numBitsForColorEndpoints	= 128 - numWeightDataBits - numConfigDataBits;
   2538 	const int	extraCemBitsStart			= 127 - numWeightDataBits - (isSingleUniqueCem		? -1
   2539 																		: numPartitions == 4	? 7
   2540 																		: numPartitions == 3	? 4
   2541 																		: numPartitions == 2	? 1
   2542 																		: 0);
   2543 	// Decode color endpoint modes.
   2544 
   2545 	deUint32 colorEndpointModes[4];
   2546 	decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
   2547 
   2548 	const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
   2549 
   2550 	// Check for errors in color endpoint value count.
   2551 
   2552 	if (numColorEndpointValues > 18 || numBitsForColorEndpoints < divRoundUp(13*numColorEndpointValues, 5))
   2553 	{
   2554 		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
   2555 		return;
   2556 	}
   2557 
   2558 	// Compute color endpoints.
   2559 
   2560 	ColorEndpointPair colorEndpoints[4];
   2561 	computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
   2562 						  computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
   2563 
   2564 	// Compute texel weights.
   2565 
   2566 	TexelWeightPair texelWeights[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT];
   2567 	computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
   2568 
   2569 	// Set texel colors.
   2570 
   2571 	const int		ccs						= blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
   2572 	const deUint32	partitionIndexSeed		= numPartitions > 1 ? blockData.getBits(13, 22) : (deUint32)-1;
   2573 
   2574 	setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
   2575 }
   2576 
   2577 } // ASTCDecompressInternal
   2578 
   2579 static void decompressASTC (const tcu::PixelBufferAccess& dst, int width, int height, const deUint8* data, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
   2580 {
   2581 	using namespace ASTCDecompressInternal;
   2582 
   2583 	DE_ASSERT(isLDR || !isSRGB);
   2584 
   2585 	const int numBlocksX		= divRoundUp(width,  blockWidth);
   2586 	const int numBlocksY		= divRoundUp(height, blockHeight);
   2587 	union
   2588 	{
   2589 		deUint8		sRGB[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
   2590 		float		linear[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
   2591 	} decompressedBuffer;
   2592 
   2593 	for (int blockY = 0; blockY < numBlocksY; blockY++)
   2594 	for (int blockX = 0; blockX < numBlocksX; blockX++)
   2595 	{
   2596 		const int baseX = blockX * blockWidth;
   2597 		const int baseY = blockY * blockHeight;
   2598 
   2599 		const Block128 blockData(&data[(blockY*numBlocksX + blockX) * ASTC_BLOCK_SIZE_BYTES]);
   2600 		decompressASTCBlock(isSRGB ? (void*)&decompressedBuffer.sRGB[0] : (void*)&decompressedBuffer.linear[0],
   2601 							blockData, blockWidth, blockHeight, isSRGB, isLDR);
   2602 
   2603 		if (isSRGB)
   2604 		{
   2605 			for (int i = 0; i < blockHeight; i++)
   2606 			for (int j = 0; j < blockWidth; j++)
   2607 			{
   2608 				if (baseX + j < dst.getWidth() && baseY + i < dst.getHeight())
   2609 					dst.setPixel(tcu::IVec4(decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 0],
   2610 											decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 1],
   2611 											decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 2],
   2612 											decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 3]),
   2613 								 baseX + j,
   2614 								 baseY + i);
   2615 			}
   2616 		}
   2617 		else
   2618 		{
   2619 			for (int i = 0; i < blockHeight; i++)
   2620 			for (int j = 0; j < blockWidth; j++)
   2621 			{
   2622 				if (baseX + j < dst.getWidth() && baseY + i < dst.getHeight())
   2623 				{
   2624 					dst.setPixel(tcu::Vec4(decompressedBuffer.linear[(i*blockWidth + j) * 4 + 0],
   2625 										   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 1],
   2626 										   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 2],
   2627 										   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 3]),
   2628 								 baseX + j,
   2629 								 baseY + i);
   2630 				}
   2631 			}
   2632 		}
   2633 	}
   2634 }
   2635 
   2636 /*--------------------------------------------------------------------*//*!
   2637  * \brief Decode to uncompressed pixel data
   2638  * \param dst Destination buffer
   2639  *//*--------------------------------------------------------------------*/
   2640 void CompressedTexture::decompress (const tcu::PixelBufferAccess& dst, const DecompressionParams& params) const
   2641 {
   2642 	DE_ASSERT(dst.getWidth() == m_width && dst.getHeight() == m_height && dst.getDepth() == 1);
   2643 	DE_ASSERT(dst.getFormat() == getUncompressedFormat());
   2644 
   2645 	if (isEtcFormat(m_format))
   2646 	{
   2647 		switch (m_format)
   2648 		{
   2649 			case ETC1_RGB8:							decompressETC1								(dst, m_width, m_height, &m_data[0]);			break;
   2650 			case EAC_R11:							decompressEAC_R11							(dst, m_width, m_height, &m_data[0], false);	break;
   2651 			case EAC_SIGNED_R11:					decompressEAC_R11							(dst, m_width, m_height, &m_data[0], true);		break;
   2652 			case EAC_RG11:							decompressEAC_RG11							(dst, m_width, m_height, &m_data[0], false);	break;
   2653 			case EAC_SIGNED_RG11:					decompressEAC_RG11							(dst, m_width, m_height, &m_data[0], true);		break;
   2654 			case ETC2_RGB8:							decompressETC2								(dst, m_width, m_height, &m_data[0]);			break;
   2655 			case ETC2_SRGB8:						decompressETC2								(dst, m_width, m_height, &m_data[0]);			break;
   2656 			case ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1		(dst, m_width, m_height, &m_data[0]);			break;
   2657 			case ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1		(dst, m_width, m_height, &m_data[0]);			break;
   2658 			case ETC2_EAC_RGBA8:					decompressETC2_EAC_RGBA8					(dst, m_width, m_height, &m_data[0]);			break;
   2659 			case ETC2_EAC_SRGB8_ALPHA8:				decompressETC2_EAC_RGBA8					(dst, m_width, m_height, &m_data[0]);			break;
   2660 
   2661 			default:
   2662 				DE_ASSERT(false);
   2663 				break;
   2664 		}
   2665 	}
   2666 	else if (isASTCFormat(m_format))
   2667 	{
   2668 		const tcu::IVec3	blockSize		= getASTCBlockSize(m_format);
   2669 		const bool			isSRGBFormat	= isASTCSRGBFormat(m_format);
   2670 
   2671 		if (blockSize.z() > 1)
   2672 			throw tcu::InternalError("3D ASTC textures not currently supported");
   2673 
   2674 		decompressASTC(dst, m_width, m_height, &m_data[0], blockSize.x(), blockSize.y(), isSRGBFormat, isSRGBFormat || params.isASTCModeLDR);
   2675 	}
   2676 	else
   2677 		DE_ASSERT(false);
   2678 }
   2679 
   2680 } // tcu
   2681