Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef sw_ShaderCore_hpp
     16 #define sw_ShaderCore_hpp
     17 
     18 #include "Debug.hpp"
     19 #include "Shader.hpp"
     20 #include "Reactor/Reactor.hpp"
     21 
     22 namespace sw
     23 {
     24 	class Vector4s
     25 	{
     26 	public:
     27 		Vector4s();
     28 		Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
     29 		Vector4s(const Vector4s &rhs);
     30 
     31 		Short4 &operator[](int i);
     32 		Vector4s &operator=(const Vector4s &rhs);
     33 
     34 		Short4 x;
     35 		Short4 y;
     36 		Short4 z;
     37 		Short4 w;
     38 	};
     39 
     40 	class Vector4f
     41 	{
     42 	public:
     43 		Vector4f();
     44 		Vector4f(float x, float y, float z, float w);
     45 		Vector4f(const Vector4f &rhs);
     46 
     47 		Float4 &operator[](int i);
     48 		Vector4f &operator=(const Vector4f &rhs);
     49 
     50 		Float4 x;
     51 		Float4 y;
     52 		Float4 z;
     53 		Float4 w;
     54 	};
     55 
     56 	Float4 exponential2(RValue<Float4> x, bool pp = false);
     57 	Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
     58 	Float4 exponential(RValue<Float4> x, bool pp = false);
     59 	Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
     60 	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
     61 	Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
     62 	Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
     63 	Float4 modulo(RValue<Float4> x, RValue<Float4> y);
     64 	Float4 sine_pi(RValue<Float4> x, bool pp = false);     // limited to [-pi, pi] range
     65 	Float4 cosine_pi(RValue<Float4> x, bool pp = false);   // limited to [-pi, pi] range
     66 	Float4 sine(RValue<Float4> x, bool pp = false);
     67 	Float4 cosine(RValue<Float4> x, bool pp = false);
     68 	Float4 tangent(RValue<Float4> x, bool pp = false);
     69 	Float4 arccos(RValue<Float4> x, bool pp = false);
     70 	Float4 arcsin(RValue<Float4> x, bool pp = false);
     71 	Float4 arctan(RValue<Float4> x, bool pp = false);
     72 	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
     73 	Float4 sineh(RValue<Float4> x, bool pp = false);
     74 	Float4 cosineh(RValue<Float4> x, bool pp = false);
     75 	Float4 tangenth(RValue<Float4> x, bool pp = false);
     76 	Float4 arccosh(RValue<Float4> x, bool pp = false);  // Limited to x >= 1
     77 	Float4 arcsinh(RValue<Float4> x, bool pp = false);
     78 	Float4 arctanh(RValue<Float4> x, bool pp = false);  // Limited to ]-1, 1[ range
     79 
     80 	Float4 dot2(const Vector4f &v0, const Vector4f &v1);
     81 	Float4 dot3(const Vector4f &v0, const Vector4f &v1);
     82 	Float4 dot4(const Vector4f &v0, const Vector4f &v1);
     83 
     84 	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
     85 	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
     86 	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
     87 	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
     88 	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
     89 	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
     90 	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
     91 
     92 	class Register
     93 	{
     94 	public:
     95 		Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
     96 		{
     97 		}
     98 
     99 		Reference<Float4> &operator[](int i)
    100 		{
    101 			switch(i)
    102 			{
    103 			default:
    104 			case 0: return x;
    105 			case 1: return y;
    106 			case 2: return z;
    107 			case 3: return w;
    108 			}
    109 		}
    110 
    111 		Register &operator=(const Register &rhs)
    112 		{
    113 			x = rhs.x;
    114 			y = rhs.y;
    115 			z = rhs.z;
    116 			w = rhs.w;
    117 
    118 			return *this;
    119 		}
    120 
    121 		Register &operator=(const Vector4f &rhs)
    122 		{
    123 			x = rhs.x;
    124 			y = rhs.y;
    125 			z = rhs.z;
    126 			w = rhs.w;
    127 
    128 			return *this;
    129 		}
    130 
    131 		operator Vector4f()
    132 		{
    133 			Vector4f v;
    134 
    135 			v.x = x;
    136 			v.y = y;
    137 			v.z = z;
    138 			v.w = w;
    139 
    140 			return v;
    141 		}
    142 
    143 		Reference<Float4> x;
    144 		Reference<Float4> y;
    145 		Reference<Float4> z;
    146 		Reference<Float4> w;
    147 	};
    148 
    149 	template<int S, bool D = false>
    150 	class RegisterArray
    151 	{
    152 	public:
    153 		RegisterArray(bool dynamic = D) : dynamic(dynamic)
    154 		{
    155 			if(dynamic)
    156 			{
    157 				x = new Array<Float4>(S);
    158 				y = new Array<Float4>(S);
    159 				z = new Array<Float4>(S);
    160 				w = new Array<Float4>(S);
    161 			}
    162 			else
    163 			{
    164 				x = new Array<Float4>[S];
    165 				y = new Array<Float4>[S];
    166 				z = new Array<Float4>[S];
    167 				w = new Array<Float4>[S];
    168 			}
    169 		}
    170 
    171 		~RegisterArray()
    172 		{
    173 			if(dynamic)
    174 			{
    175 				delete x;
    176 				delete y;
    177 				delete z;
    178 				delete w;
    179 			}
    180 			else
    181 			{
    182 				delete[] x;
    183 				delete[] y;
    184 				delete[] z;
    185 				delete[] w;
    186 			}
    187 		}
    188 
    189 		Register operator[](int i)
    190 		{
    191 			if(dynamic)
    192 			{
    193 				return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
    194 			}
    195 			else
    196 			{
    197 				return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
    198 			}
    199 		}
    200 
    201 		Register operator[](RValue<Int> i)
    202 		{
    203 			ASSERT(dynamic);
    204 
    205 			return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
    206 		}
    207 
    208 	private:
    209 		const bool dynamic;
    210 		Array<Float4> *x;
    211 		Array<Float4> *y;
    212 		Array<Float4> *z;
    213 		Array<Float4> *w;
    214 	};
    215 
    216 	class ShaderCore
    217 	{
    218 		typedef Shader::Control Control;
    219 
    220 	public:
    221 		void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
    222 		void neg(Vector4f &dst, const Vector4f &src);
    223 		void ineg(Vector4f &dst, const Vector4f &src);
    224 		void f2b(Vector4f &dst, const Vector4f &src);
    225 		void b2f(Vector4f &dst, const Vector4f &src);
    226 		void f2i(Vector4f &dst, const Vector4f &src);
    227 		void i2f(Vector4f &dst, const Vector4f &src);
    228 		void f2u(Vector4f &dst, const Vector4f &src);
    229 		void u2f(Vector4f &dst, const Vector4f &src);
    230 		void i2b(Vector4f &dst, const Vector4f &src);
    231 		void b2i(Vector4f &dst, const Vector4f &src);
    232 		void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    233 		void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    234 		void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    235 		void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    236 		void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    237 		void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    238 		void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    239 		void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    240 		void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
    241 		void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    242 		void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    243 		void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    244 		void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    245 		void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    246 		void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    247 		void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    248 		void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    249 		void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    250 		void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
    251 		void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
    252 		void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
    253 		void len2(Float4 &dst, const Vector4f &src, bool pp = false);
    254 		void len3(Float4 &dst, const Vector4f &src, bool pp = false);
    255 		void len4(Float4 &dst, const Vector4f &src, bool pp = false);
    256 		void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    257 		void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    258 		void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    259 		void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    260 		void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    261 		void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    262 		void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    263 		void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    264 		void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    265 		void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    266 		void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    267 		void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
    268 		void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    269 		void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    270 		void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    271 		void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    272 		void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    273 		void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    274 		void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    275 		void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    276 		void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
    277 		void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
    278 		void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
    279 		void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
    280 		void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
    281 		void log(Vector4f &dst, const Vector4f &src, bool pp = false);
    282 		void lit(Vector4f &dst, const Vector4f &src);
    283 		void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    284 		void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    285 		void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    286 		void packHalf2x16(Vector4f &dst, const Vector4f &src);
    287 		void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
    288 		void packSnorm2x16(Vector4f &dst, const Vector4f &src);
    289 		void packUnorm2x16(Vector4f &dst, const Vector4f &src);
    290 		void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
    291 		void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
    292 		void frc(Vector4f &dst, const Vector4f &src);
    293 		void trunc(Vector4f &dst, const Vector4f &src);
    294 		void floor(Vector4f &dst, const Vector4f &src);
    295 		void round(Vector4f &dst, const Vector4f &src);
    296 		void roundEven(Vector4f &dst, const Vector4f &src);
    297 		void ceil(Vector4f &dst, const Vector4f &src);
    298 		void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    299 		void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    300 		void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    301 		void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    302 		void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    303 		void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    304 		void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    305 		void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    306 		void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    307 		void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    308 		void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    309 		void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
    310 		void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
    311 		void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
    312 		void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
    313 		void sgn(Vector4f &dst, const Vector4f &src);
    314 		void isgn(Vector4f &dst, const Vector4f &src);
    315 		void abs(Vector4f &dst, const Vector4f &src);
    316 		void iabs(Vector4f &dst, const Vector4f &src);
    317 		void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
    318 		void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
    319 		void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
    320 		void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
    321 		void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
    322 		void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
    323 		void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
    324 		void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
    325 		void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
    326 		void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
    327 		void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
    328 		void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
    329 		void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
    330 		void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
    331 		void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
    332 		void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
    333 		void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
    334 		void expp(Vector4f &dst, const Vector4f &src, unsigned short version);
    335 		void logp(Vector4f &dst, const Vector4f &src, unsigned short version);
    336 		void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    337 		void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
    338 		void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
    339 		void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
    340 		void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
    341 		void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
    342 		void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
    343 		void all(Float4 &dst, const Vector4f &src);
    344 		void any(Float4 &dst, const Vector4f &src);
    345 		void bitwise_not(Vector4f &dst, const Vector4f &src);
    346 		void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    347 		void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    348 		void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    349 		void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    350 		void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
    351 
    352 	private:
    353 		void sgn(Float4 &dst, const Float4 &src);
    354 		void isgn(Float4 &dst, const Float4 &src);
    355 		void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
    356 		void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
    357 		void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
    358 		void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
    359 		void halfToFloatBits(Float4& dst, const Float4& halfBits);
    360 	};
    361 }
    362 
    363 #endif   // sw_ShaderCore_hpp
    364