Home | History | Annotate | Download | only in Renderer
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "Blitter.hpp"
     16 
     17 #include "Common/Debug.hpp"
     18 #include "Reactor/Reactor.hpp"
     19 
     20 namespace sw
     21 {
     22 	Blitter blitter;
     23 
     24 	Blitter::Blitter()
     25 	{
     26 		blitCache = new RoutineCache<BlitState>(1024);
     27 	}
     28 
     29 	Blitter::~Blitter()
     30 	{
     31 		delete blitCache;
     32 	}
     33 
     34 	void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
     35 	{
     36 		sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
     37 		Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
     38 		SliceRect sRect(dRect);
     39 		sRect.slice = 0;
     40 		blit(&color, sRect, dest, dRect, clearOptions);
     41 	}
     42 
     43 	void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
     44 	{
     45 		Blitter::Options options = filter ? static_cast<Blitter::Options>(WRITE_RGBA | FILTER_LINEAR) : WRITE_RGBA;
     46 		blit(source, sRect, dest, dRect, options);
     47 	}
     48 
     49 	void Blitter::blit(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
     50 	{
     51 		if(dest->getInternalFormat() == FORMAT_NULL)
     52 		{
     53 			return;
     54 		}
     55 
     56 		if(blitReactor(source, sourceRect, dest, destRect, options))
     57 		{
     58 			return;
     59 		}
     60 
     61 		SliceRect sRect = sourceRect;
     62 		SliceRect dRect = destRect;
     63 
     64 		bool flipX = destRect.x0 > destRect.x1;
     65 		bool flipY = destRect.y0 > destRect.y1;
     66 
     67 		if(flipX)
     68 		{
     69 			swap(dRect.x0, dRect.x1);
     70 			swap(sRect.x0, sRect.x1);
     71 		}
     72 		if(flipY)
     73 		{
     74 			swap(dRect.y0, dRect.y1);
     75 			swap(sRect.y0, sRect.y1);
     76 		}
     77 
     78 		source->lockInternal(sRect.x0, sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
     79 		dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
     80 
     81 		float w = static_cast<float>(sRect.x1 - sRect.x0) / static_cast<float>(dRect.x1 - dRect.x0);
     82 		float h = static_cast<float>(sRect.y1 - sRect.y0) / static_cast<float>(dRect.y1 - dRect.y0);
     83 
     84 		const float xStart = (float)sRect.x0 + 0.5f * w;
     85 		float y = (float)sRect.y0 + 0.5f * h;
     86 
     87 		for(int j = dRect.y0; j < dRect.y1; j++)
     88 		{
     89 			float x = xStart;
     90 
     91 			for(int i = dRect.x0; i < dRect.x1; i++)
     92 			{
     93 				// FIXME: Support RGBA mask
     94 				dest->copyInternal(source, i, j, x, y, (options & FILTER_LINEAR) == FILTER_LINEAR);
     95 
     96 				x += w;
     97 			}
     98 
     99 			y += h;
    100 		}
    101 
    102 		source->unlockInternal();
    103 		dest->unlockInternal();
    104 	}
    105 
    106 	void Blitter::blit3D(Surface *source, Surface *dest)
    107 	{
    108 		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
    109 		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
    110 
    111 		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
    112 		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
    113 		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
    114 
    115 		float z = 0.5f * d;
    116 		for(int k = 0; k < dest->getDepth(); ++k)
    117 		{
    118 			float y = 0.5f * h;
    119 			for(int j = 0; j < dest->getHeight(); ++j)
    120 			{
    121 				float x = 0.5f * w;
    122 				for(int i = 0; i < dest->getWidth(); ++i)
    123 				{
    124 					dest->copyInternal(source, i, j, k, x, y, z, true);
    125 					x += w;
    126 				}
    127 				y += h;
    128 			}
    129 			z += d;
    130 		}
    131 
    132 		source->unlockInternal();
    133 		dest->unlockInternal();
    134 	}
    135 
    136 	bool Blitter::read(Float4 &c, Pointer<Byte> element, Format format)
    137 	{
    138 		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
    139 
    140 		switch(format)
    141 		{
    142 		case FORMAT_L8:
    143 			c.xyz = Float(Int(*Pointer<Byte>(element)));
    144 			c.w = float(0xFF);
    145 			break;
    146 		case FORMAT_A8:
    147 			c.w = Float(Int(*Pointer<Byte>(element)));
    148 			break;
    149 		case FORMAT_R8I:
    150 		case FORMAT_R8I_SNORM:
    151 			c.x = Float(Int(*Pointer<SByte>(element)));
    152 			c.w = float(0x7F);
    153 			break;
    154 		case FORMAT_R8:
    155 		case FORMAT_R8UI:
    156 			c.x = Float(Int(*Pointer<Byte>(element)));
    157 			c.w = float(0xFF);
    158 			break;
    159 		case FORMAT_R16I:
    160 			c.x = Float(Int(*Pointer<Short>(element)));
    161 			c.w = float(0x7FFF);
    162 			break;
    163 		case FORMAT_R16UI:
    164 			c.x = Float(Int(*Pointer<UShort>(element)));
    165 			c.w = float(0xFFFF);
    166 			break;
    167 		case FORMAT_R32I:
    168 			c.x = Float(Int(*Pointer<Int>(element)));
    169 			c.w = float(0x7FFFFFFF);
    170 			break;
    171 		case FORMAT_R32UI:
    172 			c.x = Float(Int(*Pointer<UInt>(element)));
    173 			c.w = float(0xFFFFFFFF);
    174 			break;
    175 		case FORMAT_A8R8G8B8:
    176 			c = Float4(*Pointer<Byte4>(element)).zyxw;
    177 			break;
    178 		case FORMAT_A8B8G8R8I:
    179 		case FORMAT_A8B8G8R8I_SNORM:
    180 			c = Float4(*Pointer<SByte4>(element));
    181 			break;
    182 		case FORMAT_A8B8G8R8:
    183 		case FORMAT_A8B8G8R8UI:
    184 		case FORMAT_SRGB8_A8:
    185 			c = Float4(*Pointer<Byte4>(element));
    186 			break;
    187 		case FORMAT_X8R8G8B8:
    188 			c = Float4(*Pointer<Byte4>(element)).zyxw;
    189 			c.w = float(0xFF);
    190 			break;
    191 		case FORMAT_R8G8B8:
    192 			c.z = Float(Int(*Pointer<Byte>(element + 0)));
    193 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
    194 			c.x = Float(Int(*Pointer<Byte>(element + 2)));
    195 			c.w = float(0xFF);
    196 			break;
    197 		case FORMAT_B8G8R8:
    198 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
    199 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
    200 			c.z = Float(Int(*Pointer<Byte>(element + 2)));
    201 			c.w = float(0xFF);
    202 			break;
    203 		case FORMAT_X8B8G8R8I:
    204 		case FORMAT_X8B8G8R8I_SNORM:
    205 			c = Float4(*Pointer<SByte4>(element));
    206 			c.w = float(0x7F);
    207 			break;
    208 		case FORMAT_X8B8G8R8:
    209 		case FORMAT_X8B8G8R8UI:
    210 		case FORMAT_SRGB8_X8:
    211 			c = Float4(*Pointer<Byte4>(element));
    212 			c.w = float(0xFF);
    213 			break;
    214 		case FORMAT_A16B16G16R16I:
    215 			c = Float4(*Pointer<Short4>(element));
    216 			break;
    217 		case FORMAT_A16B16G16R16:
    218 		case FORMAT_A16B16G16R16UI:
    219 			c = Float4(*Pointer<UShort4>(element));
    220 			break;
    221 		case FORMAT_X16B16G16R16I:
    222 			c = Float4(*Pointer<Short4>(element));
    223 			c.w = float(0x7FFF);
    224 			break;
    225 		case FORMAT_X16B16G16R16UI:
    226 			c = Float4(*Pointer<UShort4>(element));
    227 			c.w = float(0xFFFF);
    228 			break;
    229 		case FORMAT_A32B32G32R32I:
    230 			c = Float4(*Pointer<Int4>(element));
    231 			break;
    232 		case FORMAT_A32B32G32R32UI:
    233 			c = Float4(*Pointer<UInt4>(element));
    234 			break;
    235 		case FORMAT_X32B32G32R32I:
    236 			c = Float4(*Pointer<Int4>(element));
    237 			c.w = float(0x7FFFFFFF);
    238 			break;
    239 		case FORMAT_X32B32G32R32UI:
    240 			c = Float4(*Pointer<UInt4>(element));
    241 			c.w = float(0xFFFFFFFF);
    242 			break;
    243 		case FORMAT_G8R8I:
    244 		case FORMAT_G8R8I_SNORM:
    245 			c.x = Float(Int(*Pointer<SByte>(element + 0)));
    246 			c.y = Float(Int(*Pointer<SByte>(element + 1)));
    247 			c.w = float(0x7F);
    248 			break;
    249 		case FORMAT_G8R8:
    250 		case FORMAT_G8R8UI:
    251 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
    252 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
    253 			c.w = float(0xFF);
    254 			break;
    255 		case FORMAT_G16R16I:
    256 			c.x = Float(Int(*Pointer<Short>(element + 0)));
    257 			c.y = Float(Int(*Pointer<Short>(element + 2)));
    258 			c.w = float(0x7FFF);
    259 			break;
    260 		case FORMAT_G16R16:
    261 		case FORMAT_G16R16UI:
    262 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
    263 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
    264 			c.w = float(0xFFFF);
    265 			break;
    266 		case FORMAT_G32R32I:
    267 			c.x = Float(Int(*Pointer<Int>(element + 0)));
    268 			c.y = Float(Int(*Pointer<Int>(element + 4)));
    269 			c.w = float(0x7FFFFFFF);
    270 			break;
    271 		case FORMAT_G32R32UI:
    272 			c.x = Float(Int(*Pointer<UInt>(element + 0)));
    273 			c.y = Float(Int(*Pointer<UInt>(element + 4)));
    274 			c.w = float(0xFFFFFFFF);
    275 			break;
    276 		case FORMAT_A32B32G32R32F:
    277 			c = *Pointer<Float4>(element);
    278 			break;
    279 		case FORMAT_X32B32G32R32F:
    280 		case FORMAT_B32G32R32F:
    281 			c.z = *Pointer<Float>(element + 8);
    282 		case FORMAT_G32R32F:
    283 			c.x = *Pointer<Float>(element + 0);
    284 			c.y = *Pointer<Float>(element + 4);
    285 			break;
    286 		case FORMAT_R32F:
    287 			c.x = *Pointer<Float>(element);
    288 			break;
    289 		case FORMAT_R5G6B5:
    290 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
    291 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
    292 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
    293 			break;
    294 		case FORMAT_A2B10G10R10:
    295 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
    296 			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
    297 			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
    298 			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
    299 			break;
    300 		case FORMAT_D16:
    301 			c.x = Float(Int((*Pointer<UShort>(element))));
    302 			break;
    303 		case FORMAT_D24S8:
    304 			c.x = Float(Int((*Pointer<UInt>(element))));
    305 			break;
    306 		case FORMAT_D32:
    307 			c.x = Float(Int((*Pointer<UInt>(element))));
    308 			break;
    309 		case FORMAT_D32F:
    310 			c.x = *Pointer<Float>(element);
    311 			break;
    312 		case FORMAT_D32F_COMPLEMENTARY:
    313 			c.x = 1.0f - *Pointer<Float>(element);
    314 			break;
    315 		case FORMAT_D32F_LOCKABLE:
    316 			c.x = *Pointer<Float>(element);
    317 			break;
    318 		case FORMAT_D32FS8_TEXTURE:
    319 			c.x = *Pointer<Float>(element);
    320 			break;
    321 		case FORMAT_D32FS8_SHADOW:
    322 			c.x = *Pointer<Float>(element);
    323 			break;
    324 		default:
    325 			return false;
    326 		}
    327 
    328 		return true;
    329 	}
    330 
    331 	bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
    332 	{
    333 		bool writeR = (options & WRITE_RED) == WRITE_RED;
    334 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
    335 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
    336 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
    337 		bool writeRGBA = writeR && writeG && writeB && writeA;
    338 
    339 		switch(format)
    340 		{
    341 		case FORMAT_L8:
    342 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
    343 			break;
    344 		case FORMAT_A8:
    345 			if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
    346 			break;
    347 		case FORMAT_A8R8G8B8:
    348 			if(writeRGBA)
    349 			{
    350 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
    351 				Byte8 c1 = Pack(c0, c0);
    352 				*Pointer<UInt>(element) = UInt(As<Long>(c1));
    353 			}
    354 			else
    355 			{
    356 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
    357 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    358 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
    359 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
    360 			}
    361 			break;
    362 		case FORMAT_A8B8G8R8:
    363 		case FORMAT_SRGB8_A8:
    364 			if(writeRGBA)
    365 			{
    366 				UShort4 c0 = As<UShort4>(RoundShort4(c));
    367 				Byte8 c1 = Pack(c0, c0);
    368 				*Pointer<UInt>(element) = UInt(As<Long>(c1));
    369 			}
    370 			else
    371 			{
    372 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
    373 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    374 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
    375 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
    376 			}
    377 			break;
    378 		case FORMAT_X8R8G8B8:
    379 			if(writeRGBA)
    380 			{
    381 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
    382 				Byte8 c1 = Pack(c0, c0);
    383 				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
    384 			}
    385 			else
    386 			{
    387 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
    388 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    389 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
    390 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
    391 			}
    392 			break;
    393 		case FORMAT_X8B8G8R8:
    394 		case FORMAT_SRGB8_X8:
    395 			if(writeRGBA)
    396 			{
    397 				UShort4 c0 = As<UShort4>(RoundShort4(c));
    398 				Byte8 c1 = Pack(c0, c0);
    399 				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
    400 			}
    401 			else
    402 			{
    403 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
    404 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    405 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
    406 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
    407 			}
    408 			break;
    409 		case FORMAT_R8G8B8:
    410 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
    411 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    412 			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
    413 			break;
    414 		case FORMAT_B8G8R8:
    415 			if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
    416 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    417 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
    418 			break;
    419 		case FORMAT_A32B32G32R32F:
    420 			if(writeRGBA)
    421 			{
    422 				*Pointer<Float4>(element) = c;
    423 			}
    424 			else
    425 			{
    426 				if(writeR) { *Pointer<Float>(element) = c.x; }
    427 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
    428 				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
    429 				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
    430 			}
    431 			break;
    432 		case FORMAT_X32B32G32R32F:
    433 			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
    434 		case FORMAT_B32G32R32F:
    435 			if(writeR) { *Pointer<Float>(element) = c.x; }
    436 			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
    437 			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
    438 			break;
    439 		case FORMAT_G32R32F:
    440 			if(writeR && writeG)
    441 			{
    442 				*Pointer<Float2>(element) = Float2(c);
    443 			}
    444 			else
    445 			{
    446 				if(writeR) { *Pointer<Float>(element) = c.x; }
    447 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
    448 			}
    449 			break;
    450 		case FORMAT_R32F:
    451 			if(writeR) { *Pointer<Float>(element) = c.x; }
    452 			break;
    453 		case FORMAT_A8B8G8R8I:
    454 		case FORMAT_A8B8G8R8I_SNORM:
    455 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
    456 		case FORMAT_X8B8G8R8I:
    457 		case FORMAT_X8B8G8R8I_SNORM:
    458 			if(writeA && (format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM))
    459 			{
    460 				*Pointer<SByte>(element + 3) = SByte(0x7F);
    461 			}
    462 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
    463 		case FORMAT_G8R8I:
    464 		case FORMAT_G8R8I_SNORM:
    465 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
    466 		case FORMAT_R8I:
    467 		case FORMAT_R8I_SNORM:
    468 			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
    469 			break;
    470 		case FORMAT_A8B8G8R8UI:
    471 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
    472 		case FORMAT_X8B8G8R8UI:
    473 			if(writeA && (format == FORMAT_X8B8G8R8UI))
    474 			{
    475 				*Pointer<Byte>(element + 3) = Byte(0xFF);
    476 			}
    477 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
    478 		case FORMAT_G8R8UI:
    479 		case FORMAT_G8R8:
    480 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
    481 		case FORMAT_R8UI:
    482 		case FORMAT_R8:
    483 			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
    484 			break;
    485 		case FORMAT_A16B16G16R16I:
    486 			if(writeRGBA)
    487 			{
    488 				*Pointer<Short4>(element) = Short4(RoundInt(c));
    489 			}
    490 			else
    491 			{
    492 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
    493 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
    494 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
    495 				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
    496 			}
    497 			break;
    498 		case FORMAT_X16B16G16R16I:
    499 			if(writeRGBA)
    500 			{
    501 				*Pointer<Short4>(element) = Short4(RoundInt(c));
    502 			}
    503 			else
    504 			{
    505 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
    506 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
    507 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
    508 			}
    509 			if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
    510 			break;
    511 		case FORMAT_G16R16I:
    512 			if(writeR && writeG)
    513 			{
    514 				*Pointer<UInt>(element) = UInt(As<Long>(Short4(RoundInt(c))));
    515 			}
    516 			else
    517 			{
    518 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
    519 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
    520 			}
    521 			break;
    522 		case FORMAT_R16I:
    523 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
    524 			break;
    525 		case FORMAT_A16B16G16R16UI:
    526 		case FORMAT_A16B16G16R16:
    527 			if(writeRGBA)
    528 			{
    529 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
    530 			}
    531 			else
    532 			{
    533 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
    534 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
    535 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
    536 				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
    537 			}
    538 			break;
    539 		case FORMAT_X16B16G16R16UI:
    540 			if(writeRGBA)
    541 			{
    542 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
    543 			}
    544 			else
    545 			{
    546 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
    547 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
    548 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
    549 			}
    550 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
    551 			break;
    552 		case FORMAT_G16R16UI:
    553 		case FORMAT_G16R16:
    554 			if(writeR && writeG)
    555 			{
    556 				*Pointer<UInt>(element) = UInt(As<Long>(UShort4(RoundInt(c))));
    557 			}
    558 			else
    559 			{
    560 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
    561 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
    562 			}
    563 			break;
    564 		case FORMAT_R16UI:
    565 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
    566 			break;
    567 		case FORMAT_A32B32G32R32I:
    568 			if(writeRGBA)
    569 			{
    570 				*Pointer<Int4>(element) = RoundInt(c);
    571 			}
    572 			else
    573 			{
    574 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
    575 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
    576 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
    577 				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
    578 			}
    579 			break;
    580 		case FORMAT_X32B32G32R32I:
    581 			if(writeRGBA)
    582 			{
    583 				*Pointer<Int4>(element) = RoundInt(c);
    584 			}
    585 			else
    586 			{
    587 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
    588 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
    589 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
    590 			}
    591 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
    592 			break;
    593 		case FORMAT_G32R32I:
    594 			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
    595 		case FORMAT_R32I:
    596 			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
    597 			break;
    598 		case FORMAT_A32B32G32R32UI:
    599 			if(writeRGBA)
    600 			{
    601 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
    602 			}
    603 			else
    604 			{
    605 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
    606 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
    607 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
    608 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
    609 			}
    610 			break;
    611 		case FORMAT_X32B32G32R32UI:
    612 			if(writeRGBA)
    613 			{
    614 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
    615 			}
    616 			else
    617 			{
    618 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
    619 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
    620 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
    621 			}
    622 			if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
    623 			break;
    624 		case FORMAT_G32R32UI:
    625 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
    626 		case FORMAT_R32UI:
    627 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
    628 			break;
    629 		case FORMAT_R5G6B5:
    630 			if(writeR && writeG && writeB)
    631 			{
    632 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
    633 				                                  (RoundInt(Float(c.y)) << Int(5)) |
    634 				                                  (RoundInt(Float(c.x)) << Int(11)));
    635 			}
    636 			else
    637 			{
    638 				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
    639 				unsigned short unmask = ~mask;
    640 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
    641 				                            (UShort(RoundInt(Float(c.z)) |
    642 				                                   (RoundInt(Float(c.y)) << Int(5)) |
    643 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
    644 			}
    645 			break;
    646 		case FORMAT_A2B10G10R10:
    647 			if(writeRGBA)
    648 			{
    649 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
    650 				                              (RoundInt(Float(c.y)) << 10) |
    651 				                              (RoundInt(Float(c.z)) << 20) |
    652 				                              (RoundInt(Float(c.w)) << 30));
    653 			}
    654 			else
    655 			{
    656 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
    657 				                    (writeB ? 0x3FF00000 : 0x0000) |
    658 				                    (writeG ? 0x000FFC00 : 0x0000) |
    659 				                    (writeR ? 0x000003FF : 0x0000);
    660 				unsigned int unmask = ~mask;
    661 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
    662 				                            (UInt(RoundInt(Float(c.x)) |
    663 				                                  (RoundInt(Float(c.y)) << 10) |
    664 				                                  (RoundInt(Float(c.z)) << 20) |
    665 				                                  (RoundInt(Float(c.w)) << 30)) & UInt(mask));
    666 			}
    667 			break;
    668 		case FORMAT_D16:
    669 			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
    670 			break;
    671 		case FORMAT_D24S8:
    672 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
    673 			break;
    674 		case FORMAT_D32:
    675 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
    676 			break;
    677 		case FORMAT_D32F:
    678 			*Pointer<Float>(element) = c.x;
    679 			break;
    680 		case FORMAT_D32F_COMPLEMENTARY:
    681 			*Pointer<Float>(element) = 1.0f - c.x;
    682 			break;
    683 		case FORMAT_D32F_LOCKABLE:
    684 			*Pointer<Float>(element) = c.x;
    685 			break;
    686 		case FORMAT_D32FS8_TEXTURE:
    687 			*Pointer<Float>(element) = c.x;
    688 			break;
    689 		case FORMAT_D32FS8_SHADOW:
    690 			*Pointer<Float>(element) = c.x;
    691 			break;
    692 		default:
    693 			return false;
    694 		}
    695 		return true;
    696 	}
    697 
    698 	bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
    699 	{
    700 		c = Int4(0, 0, 0, 0xFFFFFFFF);
    701 
    702 		switch(format)
    703 		{
    704 		case FORMAT_A8B8G8R8I:
    705 			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
    706 		case FORMAT_X8B8G8R8I:
    707 			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
    708 		case FORMAT_G8R8I:
    709 			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
    710 		case FORMAT_R8I:
    711 			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
    712 			if(format != FORMAT_A8B8G8R8I)
    713 			{
    714 				c = Insert(c, Int(0x7F), 3); // Set alpha
    715 			}
    716 			break;
    717 		case FORMAT_A8B8G8R8UI:
    718 			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
    719 		case FORMAT_X8B8G8R8UI:
    720 			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
    721 		case FORMAT_G8R8UI:
    722 			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
    723 		case FORMAT_R8UI:
    724 			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
    725 			if(format != FORMAT_A8B8G8R8UI)
    726 			{
    727 				c = Insert(c, Int(0xFF), 3); // Set alpha
    728 			}
    729 			break;
    730 		case FORMAT_A16B16G16R16I:
    731 			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
    732 		case FORMAT_X16B16G16R16I:
    733 			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
    734 		case FORMAT_G16R16I:
    735 			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
    736 		case FORMAT_R16I:
    737 			c = Insert(c, Int(*Pointer<Short>(element)), 0);
    738 			if(format != FORMAT_A16B16G16R16I)
    739 			{
    740 				c = Insert(c, Int(0x7FFF), 3); // Set alpha
    741 			}
    742 			break;
    743 		case FORMAT_A16B16G16R16UI:
    744 			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
    745 		case FORMAT_X16B16G16R16UI:
    746 			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
    747 		case FORMAT_G16R16UI:
    748 			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
    749 		case FORMAT_R16UI:
    750 			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
    751 			if(format != FORMAT_A16B16G16R16UI)
    752 			{
    753 				c = Insert(c, Int(0xFFFF), 3); // Set alpha
    754 			}
    755 			break;
    756 		case FORMAT_A32B32G32R32I:
    757 			c = *Pointer<Int4>(element);
    758 			break;
    759 		case FORMAT_X32B32G32R32I:
    760 			c = Insert(c, *Pointer<Int>(element + 8), 2);
    761 		case FORMAT_G32R32I:
    762 			c = Insert(c, *Pointer<Int>(element + 4), 1);
    763 		case FORMAT_R32I:
    764 			c = Insert(c, *Pointer<Int>(element), 0);
    765 			c = Insert(c, Int(0x7FFFFFFF), 3); // Set alpha
    766 			break;
    767 		case FORMAT_A32B32G32R32UI:
    768 			c = *Pointer<UInt4>(element);
    769 			break;
    770 		case FORMAT_X32B32G32R32UI:
    771 			c = Insert(c, Int(*Pointer<UInt>(element + 8)), 2);
    772 		case FORMAT_G32R32UI:
    773 			c = Insert(c, Int(*Pointer<UInt>(element + 4)), 1);
    774 		case FORMAT_R32UI:
    775 			c = Insert(c, Int(*Pointer<UInt>(element)), 0);
    776 			c = Insert(c, Int(UInt(0xFFFFFFFFU)), 3); // Set alpha
    777 			break;
    778 		default:
    779 			return false;
    780 		}
    781 
    782 		return true;
    783 	}
    784 
    785 	bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
    786 	{
    787 		bool writeR = (options & WRITE_RED) == WRITE_RED;
    788 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
    789 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
    790 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
    791 		bool writeRGBA = writeR && writeG && writeB && writeA;
    792 
    793 		switch(format)
    794 		{
    795 		case FORMAT_A8B8G8R8I:
    796 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
    797 		case FORMAT_X8B8G8R8I:
    798 			if(writeA && (format != FORMAT_A8B8G8R8I))
    799 			{
    800 				*Pointer<SByte>(element + 3) = SByte(0x7F);
    801 			}
    802 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
    803 		case FORMAT_G8R8I:
    804 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
    805 		case FORMAT_R8I:
    806 			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
    807 			break;
    808 		case FORMAT_A8B8G8R8UI:
    809 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
    810 		case FORMAT_X8B8G8R8UI:
    811 			if(writeA && (format != FORMAT_A8B8G8R8UI))
    812 			{
    813 				*Pointer<Byte>(element + 3) = Byte(0xFF);
    814 			}
    815 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
    816 		case FORMAT_G8R8UI:
    817 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
    818 		case FORMAT_R8UI:
    819 			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
    820 			break;
    821 		case FORMAT_A16B16G16R16I:
    822 			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
    823 		case FORMAT_X16B16G16R16I:
    824 			if(writeA && (format != FORMAT_A16B16G16R16I))
    825 			{
    826 				*Pointer<Short>(element + 6) = Short(0x7FFF);
    827 			}
    828 			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
    829 		case FORMAT_G16R16I:
    830 			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
    831 		case FORMAT_R16I:
    832 			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
    833 			break;
    834 		case FORMAT_A16B16G16R16UI:
    835 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
    836 		case FORMAT_X16B16G16R16UI:
    837 			if(writeA && (format != FORMAT_A16B16G16R16UI))
    838 			{
    839 				*Pointer<UShort>(element + 6) = UShort(0xFFFF);
    840 			}
    841 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
    842 		case FORMAT_G16R16UI:
    843 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
    844 		case FORMAT_R16UI:
    845 			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
    846 			break;
    847 		case FORMAT_A32B32G32R32I:
    848 			if(writeRGBA)
    849 			{
    850 				*Pointer<Int4>(element) = c;
    851 			}
    852 			else
    853 			{
    854 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
    855 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
    856 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
    857 				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
    858 			}
    859 			break;
    860 		case FORMAT_X32B32G32R32I:
    861 			if(writeRGBA)
    862 			{
    863 				*Pointer<Int4>(element) = c;
    864 			}
    865 			else
    866 			{
    867 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
    868 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
    869 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
    870 			}
    871 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
    872 			break;
    873 		case FORMAT_G32R32I:
    874 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
    875 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
    876 			break;
    877 		case FORMAT_R32I:
    878 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
    879 			break;
    880 		case FORMAT_A32B32G32R32UI:
    881 			if(writeRGBA)
    882 			{
    883 				*Pointer<UInt4>(element) = As<UInt4>(c);
    884 			}
    885 			else
    886 			{
    887 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
    888 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
    889 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
    890 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
    891 			}
    892 			break;
    893 		case FORMAT_X32B32G32R32UI:
    894 			if(writeRGBA)
    895 			{
    896 				*Pointer<UInt4>(element) = As<UInt4>(c);
    897 			}
    898 			else
    899 			{
    900 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
    901 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
    902 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
    903 			}
    904 			if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
    905 			break;
    906 		case FORMAT_G32R32UI:
    907 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
    908 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
    909 			break;
    910 		case FORMAT_R32UI:
    911 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
    912 			break;
    913 		default:
    914 			return false;
    915 		}
    916 
    917 		return true;
    918 	}
    919 
    920 	bool Blitter::GetScale(float4& scale, Format format)
    921 	{
    922 		switch(format)
    923 		{
    924 		case FORMAT_L8:
    925 		case FORMAT_A8:
    926 		case FORMAT_A8R8G8B8:
    927 		case FORMAT_X8R8G8B8:
    928 		case FORMAT_R8:
    929 		case FORMAT_G8R8:
    930 		case FORMAT_R8G8B8:
    931 		case FORMAT_B8G8R8:
    932 		case FORMAT_X8B8G8R8:
    933 		case FORMAT_A8B8G8R8:
    934 		case FORMAT_SRGB8_X8:
    935 		case FORMAT_SRGB8_A8:
    936 			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
    937 			break;
    938 		case FORMAT_R8I_SNORM:
    939 		case FORMAT_G8R8I_SNORM:
    940 		case FORMAT_X8B8G8R8I_SNORM:
    941 		case FORMAT_A8B8G8R8I_SNORM:
    942 			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
    943 			break;
    944 		case FORMAT_A16B16G16R16:
    945 			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
    946 			break;
    947 		case FORMAT_R8I:
    948 		case FORMAT_R8UI:
    949 		case FORMAT_G8R8I:
    950 		case FORMAT_G8R8UI:
    951 		case FORMAT_X8B8G8R8I:
    952 		case FORMAT_X8B8G8R8UI:
    953 		case FORMAT_A8B8G8R8I:
    954 		case FORMAT_A8B8G8R8UI:
    955 		case FORMAT_R16I:
    956 		case FORMAT_R16UI:
    957 		case FORMAT_G16R16:
    958 		case FORMAT_G16R16I:
    959 		case FORMAT_G16R16UI:
    960 		case FORMAT_X16B16G16R16I:
    961 		case FORMAT_X16B16G16R16UI:
    962 		case FORMAT_A16B16G16R16I:
    963 		case FORMAT_A16B16G16R16UI:
    964 		case FORMAT_R32I:
    965 		case FORMAT_R32UI:
    966 		case FORMAT_G32R32I:
    967 		case FORMAT_G32R32UI:
    968 		case FORMAT_X32B32G32R32I:
    969 		case FORMAT_X32B32G32R32UI:
    970 		case FORMAT_A32B32G32R32I:
    971 		case FORMAT_A32B32G32R32UI:
    972 		case FORMAT_A32B32G32R32F:
    973 		case FORMAT_X32B32G32R32F:
    974 		case FORMAT_B32G32R32F:
    975 		case FORMAT_G32R32F:
    976 		case FORMAT_R32F:
    977 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
    978 			break;
    979 		case FORMAT_R5G6B5:
    980 			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
    981 			break;
    982 		case FORMAT_A2B10G10R10:
    983 			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
    984 			break;
    985 		case FORMAT_D16:
    986 			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
    987 			break;
    988 		case FORMAT_D24S8:
    989 			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
    990 			break;
    991 		case FORMAT_D32:
    992 			scale = vector(0xFFFFFFFF, 0.0f, 0.0f, 0.0f);
    993 			break;
    994 		case FORMAT_D32F:
    995 		case FORMAT_D32F_COMPLEMENTARY:
    996 		case FORMAT_D32F_LOCKABLE:
    997 		case FORMAT_D32FS8_TEXTURE:
    998 		case FORMAT_D32FS8_SHADOW:
    999 			scale = vector(1.0f, 0.0f, 0.0f, 0.0f);
   1000 			break;
   1001 		default:
   1002 			return false;
   1003 		}
   1004 
   1005 		return true;
   1006 	}
   1007 
   1008 	bool Blitter::ApplyScaleAndClamp(Float4& value, const BlitState& state)
   1009 	{
   1010 		float4 scale, unscale;
   1011 		if(Surface::isNonNormalizedInteger(state.sourceFormat) &&
   1012 		   !Surface::isNonNormalizedInteger(state.destFormat) &&
   1013 		   (state.options & CLEAR_OPERATION))
   1014 		{
   1015 			// If we're clearing a buffer from an int or uint color into a normalized color,
   1016 			// then the whole range of the int or uint color must be scaled between 0 and 1.
   1017 			switch(state.sourceFormat)
   1018 			{
   1019 			case FORMAT_A32B32G32R32I:
   1020 				unscale = vector(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
   1021 				break;
   1022 			case FORMAT_A32B32G32R32UI:
   1023 				unscale = vector(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
   1024 				break;
   1025 			default:
   1026 				return false;
   1027 			}
   1028 		}
   1029 		else if(!GetScale(unscale, state.sourceFormat))
   1030 		{
   1031 			return false;
   1032 		}
   1033 
   1034 		if(!GetScale(scale, state.destFormat))
   1035 		{
   1036 			return false;
   1037 		}
   1038 
   1039 		if(unscale != scale)
   1040 		{
   1041 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
   1042 		}
   1043 
   1044 		if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
   1045 		{
   1046 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
   1047 
   1048 			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
   1049 			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
   1050 			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
   1051 			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
   1052 		}
   1053 
   1054 		return true;
   1055 	}
   1056 
   1057 	Routine *Blitter::generate(BlitState &state)
   1058 	{
   1059 		Function<Void(Pointer<Byte>)> function;
   1060 		{
   1061 			Pointer<Byte> blit(function.Arg<0>());
   1062 
   1063 			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
   1064 			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
   1065 			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
   1066 			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
   1067 
   1068 			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
   1069 			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
   1070 			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
   1071 			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
   1072 
   1073 			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
   1074 			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
   1075 			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
   1076 			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
   1077 
   1078 			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
   1079 			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
   1080 
   1081 			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
   1082 			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
   1083 			bool intBoth = intSrc && intDst;
   1084 
   1085 			bool hasConstantColorI = false;
   1086 			Int4 constantColorI;
   1087 			bool hasConstantColorF = false;
   1088 			Float4 constantColorF;
   1089 			if(state.options & CLEAR_OPERATION)
   1090 			{
   1091 				if(intBoth) // Integer types
   1092 				{
   1093 					if(!read(constantColorI, source, state.sourceFormat))
   1094 					{
   1095 						return nullptr;
   1096 					}
   1097 					hasConstantColorI = true;
   1098 				}
   1099 				else
   1100 				{
   1101 					if(!read(constantColorF, source, state.sourceFormat))
   1102 					{
   1103 						return nullptr;
   1104 					}
   1105 					hasConstantColorF = true;
   1106 
   1107 					if(!ApplyScaleAndClamp(constantColorF, state))
   1108 					{
   1109 						return nullptr;
   1110 					}
   1111 				}
   1112 			}
   1113 
   1114 			Float y = y0;
   1115 
   1116 			For(Int j = y0d, j < y1d, j++)
   1117 			{
   1118 				Float x = x0;
   1119 				Pointer<Byte> destLine = dest + j * dPitchB;
   1120 
   1121 				For(Int i = x0d, i < x1d, i++)
   1122 				{
   1123 					Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
   1124 					if(hasConstantColorI)
   1125 					{
   1126 						if(!write(constantColorI, d, state.destFormat, state.options))
   1127 						{
   1128 							return nullptr;
   1129 						}
   1130 					}
   1131 					else if(hasConstantColorF)
   1132 					{
   1133 						if(!write(constantColorF, d, state.destFormat, state.options))
   1134 						{
   1135 							return nullptr;
   1136 						}
   1137 					}
   1138 					else if(intBoth) // Integer types do not support filtering
   1139 					{
   1140 						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
   1141 						Pointer<Byte> s = source + Int(y) * sPitchB + Int(x) * Surface::bytes(state.sourceFormat);
   1142 						if(!read(color, s, state.sourceFormat))
   1143 						{
   1144 							return nullptr;
   1145 						}
   1146 
   1147 						if(!write(color, d, state.destFormat, state.options))
   1148 						{
   1149 							return nullptr;
   1150 						}
   1151 					}
   1152 					else
   1153 					{
   1154 						Float4 color;
   1155 
   1156 						if(!(state.options & FILTER_LINEAR) || intSrc)
   1157 						{
   1158 							Int X = Int(x);
   1159 							Int Y = Int(y);
   1160 
   1161 							Pointer<Byte> s = source + Y * sPitchB + X * Surface::bytes(state.sourceFormat);
   1162 
   1163 							if(!read(color, s, state.sourceFormat))
   1164 							{
   1165 								return nullptr;
   1166 							}
   1167 						}
   1168 						else   // Bilinear filtering
   1169 						{
   1170 							Float x0 = x - 0.5f;
   1171 							Float y0 = y - 0.5f;
   1172 
   1173 							Int X0 = Max(Int(x0), 0);
   1174 							Int Y0 = Max(Int(y0), 0);
   1175 
   1176 							Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
   1177 							Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
   1178 
   1179 							Pointer<Byte> s00 = source + Y0 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
   1180 							Pointer<Byte> s01 = source + Y0 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
   1181 							Pointer<Byte> s10 = source + Y1 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
   1182 							Pointer<Byte> s11 = source + Y1 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
   1183 
   1184 							Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
   1185 							Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
   1186 							Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
   1187 							Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
   1188 
   1189 							Float4 fx = Float4(x0 - Float(X0));
   1190 							Float4 fy = Float4(y0 - Float(Y0));
   1191 
   1192 							color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
   1193 							        c01 * fx * (Float4(1.0f) - fy) +
   1194 							        c10 * (Float4(1.0f) - fx) * fy +
   1195 							        c11 * fx * fy;
   1196 						}
   1197 
   1198 						if(!ApplyScaleAndClamp(color, state) || !write(color, d, state.destFormat, state.options))
   1199 						{
   1200 							return nullptr;
   1201 						}
   1202 					}
   1203 
   1204 					if(!hasConstantColorI && !hasConstantColorF) { x += w; }
   1205 				}
   1206 
   1207 				if(!hasConstantColorI && !hasConstantColorF) { y += h; }
   1208 			}
   1209 		}
   1210 
   1211 		return function(L"BlitRoutine");
   1212 	}
   1213 
   1214 	bool Blitter::blitReactor(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
   1215 	{
   1216 		ASSERT(!(options & CLEAR_OPERATION) || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
   1217 
   1218 		Rect dRect = destRect;
   1219 		Rect sRect = sourceRect;
   1220 		if(destRect.x0 > destRect.x1)
   1221 		{
   1222 			swap(dRect.x0, dRect.x1);
   1223 			swap(sRect.x0, sRect.x1);
   1224 		}
   1225 		if(destRect.y0 > destRect.y1)
   1226 		{
   1227 			swap(dRect.y0, dRect.y1);
   1228 			swap(sRect.y0, sRect.y1);
   1229 		}
   1230 
   1231 		BlitState state;
   1232 
   1233 		bool useSourceInternal = !source->isExternalDirty();
   1234 		bool useDestInternal = !dest->isExternalDirty();
   1235 
   1236 		state.sourceFormat = source->getFormat(useSourceInternal);
   1237 		state.destFormat = dest->getFormat(useDestInternal);
   1238 		state.options = options;
   1239 
   1240 		criticalSection.lock();
   1241 		Routine *blitRoutine = blitCache->query(state);
   1242 
   1243 		if(!blitRoutine)
   1244 		{
   1245 			blitRoutine = generate(state);
   1246 
   1247 			if(!blitRoutine)
   1248 			{
   1249 				criticalSection.unlock();
   1250 				return false;
   1251 			}
   1252 
   1253 			blitCache->add(state, blitRoutine);
   1254 		}
   1255 
   1256 		criticalSection.unlock();
   1257 
   1258 		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
   1259 
   1260 		BlitData data;
   1261 
   1262 		bool isRGBA = ((options & WRITE_RGBA) == WRITE_RGBA);
   1263 		bool isEntireDest = dest->isEntire(destRect);
   1264 
   1265 		data.source = source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
   1266 		data.dest = dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
   1267 		data.sPitchB = source->getPitchB(useSourceInternal);
   1268 		data.dPitchB = dest->getPitchB(useDestInternal);
   1269 
   1270 		data.w = 1.0f / (dRect.x1 - dRect.x0) * (sRect.x1 - sRect.x0);
   1271 		data.h = 1.0f / (dRect.y1 - dRect.y0) * (sRect.y1 - sRect.y0);
   1272 		data.x0 = (float)sRect.x0 + 0.5f * data.w;
   1273 		data.y0 = (float)sRect.y0 + 0.5f * data.h;
   1274 
   1275 		data.x0d = dRect.x0;
   1276 		data.x1d = dRect.x1;
   1277 		data.y0d = dRect.y0;
   1278 		data.y1d = dRect.y1;
   1279 
   1280 		data.sWidth = source->getWidth();
   1281 		data.sHeight = source->getHeight();
   1282 
   1283 		blitFunction(&data);
   1284 
   1285 		source->unlock(useSourceInternal);
   1286 		dest->unlock(useDestInternal);
   1287 
   1288 		return true;
   1289 	}
   1290 }
   1291