Home | History | Annotate | Download | only in libpixelflinger
      1 /* libs/pixelflinger/fixed.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <stdio.h>
     19 
     20 #include <private/pixelflinger/ggl_context.h>
     21 #include <private/pixelflinger/ggl_fixed.h>
     22 
     23 
     24 // ------------------------------------------------------------------------
     25 
     26 int32_t gglRecipQNormalized(int32_t x, int* exponent)
     27 {
     28     const int32_t s = x>>31;
     29     uint32_t a = s ? -x : x;
     30 
     31     // the result will overflow, so just set it to the biggest/inf value
     32     if (ggl_unlikely(a <= 2LU)) {
     33         *exponent = 0;
     34         return s ? FIXED_MIN : FIXED_MAX;
     35     }
     36 
     37     // Newton-Raphson iteration:
     38     // x = r*(2 - a*r)
     39 
     40     const int32_t lz = gglClz(a);
     41     a <<= lz;  // 0.32
     42     uint32_t r = a;
     43     // note: if a == 0x80000000, this means x was a power-of-2, in this
     44     // case we don't need to compute anything. We get the reciprocal for
     45     // (almost) free.
     46     if (a != 0x80000000) {
     47         r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
     48         // 0.32 + 2.30 = 2.62 -> 2.30
     49         // 2.30 + 2.30 = 4.60 -> 2.30
     50         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
     51         r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
     52     }
     53 
     54     // shift right 1-bit to make room for the sign bit
     55     *exponent = 30-lz-1;
     56     r >>= 1;
     57     return s ? -r : r;
     58 }
     59 
     60 int32_t gglRecipQ(GGLfixed x, int q)
     61 {
     62     int shift;
     63     x = gglRecipQNormalized(x, &shift);
     64     shift += 16-q;
     65     if (shift > 0)
     66         x += 1L << (shift-1);   // rounding
     67     x >>= shift;
     68     return x;
     69 }
     70 
     71 // ------------------------------------------------------------------------
     72 
     73 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
     74 {
     75     if ((d>>24) && ((d>>24)+1)) {
     76         n >>= 8;
     77         d >>= 8;
     78     }
     79     return gglMulx(n, gglRecip(d));
     80 }
     81 
     82 // ------------------------------------------------------------------------
     83 
     84 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
     85     // 1/sqrt(x) with x = 1-N/16, N=[8...1]
     86     0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
     87 };
     88 
     89 GGLfixed gglSqrtRecipx(GGLfixed x)
     90 {
     91     if (x == 0)         return FIXED_MAX;
     92     if (x == FIXED_ONE) return x;
     93     const GGLfixed a = x;
     94     const int32_t lz = gglClz(x);
     95     x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
     96     const int32_t exp = lz - 16;
     97     if (exp <= 0)   x >>= -exp>>1;
     98     else            x <<= (exp>>1) + (exp & 1);
     99     if (exp & 1) {
    100         x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
    101     }
    102     // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
    103     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
    104     x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
    105     return x;
    106 }
    107 
    108 GGLfixed gglSqrtx(GGLfixed a)
    109 {
    110     // Compute a full precision square-root (24 bits accuracy)
    111     GGLfixed r = 0;
    112     GGLfixed bit = 0x800000;
    113     int32_t bshift = 15;
    114     do {
    115         GGLfixed temp = bit + (r<<1);
    116         if (bshift >= 8)    temp <<= (bshift-8);
    117         else                temp >>= (8-bshift);
    118         if (a >= temp) {
    119             r += bit;
    120             a -= temp;
    121         }
    122         bshift--;
    123     } while (bit>>=1);
    124     return r;
    125 }
    126 
    127 // ------------------------------------------------------------------------
    128 
    129 static const GGLfixed ggl_log_approx_tab[] = {
    130     // -ln(x)/ln(2) with x = N/16, N=[8...16]
    131     0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
    132 };
    133 
    134 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
    135 	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
    136 };
    137 
    138 GGLfixed gglPowx(GGLfixed x, GGLfixed y)
    139 {
    140     // prerequisite: 0 <= x <= 1, and y >=0
    141 
    142     // pow(x,y) = 2^(y*log2(x))
    143     // =  2^(y*log2(x*(2^exp)*(2^-exp))))
    144     // =  2^(y*(log2(X)-exp))
    145     // =  2^(log2(X)*y - y*exp)
    146     // =  2^( - (-log2(X)*y + y*exp) )
    147 
    148     int32_t exp = gglClz(x) - 16;
    149     GGLfixed f = x << exp;
    150     x = (f & 0x0FFF)<<4;
    151     f = (f >> 12) & 0x7;
    152     GGLfixed p = gglMulAddx(
    153             ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
    154             ggl_log_approx_tab[f]);
    155     p = gglMulAddx(p, y, y*exp);
    156     exp = gglFixedToIntFloor(p);
    157     if (exp < 31) {
    158         p = gglFracx(p);
    159         x = (p & 0x1FFF)<<3;
    160         p >>= 13;
    161         p = gglMulAddx(
    162                 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
    163                 ggl_alog_approx_tab[p]);
    164         p >>= exp;
    165     } else {
    166         p = 0;
    167     }
    168     return p;
    169         // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
    170 }
    171 
    172 // ------------------------------------------------------------------------
    173 
    174 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
    175 {
    176     //int32_t r =int32_t((int64_t(n)<<i)/d);
    177     const int32_t ds = n^d;
    178     if (n<0) n = -n;
    179     if (d<0) d = -d;
    180     int nd = gglClz(d) - gglClz(n);
    181     i += nd + 1;
    182     if (nd > 0) d <<= nd;
    183     else        n <<= -nd;
    184     uint32_t q = 0;
    185 
    186     int j = i & 7;
    187     i >>= 3;
    188 
    189     // gcc deals with the code below pretty well.
    190     // we get 3.75 cycles per bit in the main loop
    191     // and 8 cycles per bit in the termination loop
    192     if (ggl_likely(i)) {
    193         n -= d;
    194         do {
    195             q <<= 8;
    196             if (n>=0)   q |= 128;
    197             else        n += d;
    198             n = n*2 - d;
    199             if (n>=0)   q |= 64;
    200             else        n += d;
    201             n = n*2 - d;
    202             if (n>=0)   q |= 32;
    203             else        n += d;
    204             n = n*2 - d;
    205             if (n>=0)   q |= 16;
    206             else        n += d;
    207             n = n*2 - d;
    208             if (n>=0)   q |= 8;
    209             else        n += d;
    210             n = n*2 - d;
    211             if (n>=0)   q |= 4;
    212             else        n += d;
    213             n = n*2 - d;
    214             if (n>=0)   q |= 2;
    215             else        n += d;
    216             n = n*2 - d;
    217             if (n>=0)   q |= 1;
    218             else        n += d;
    219 
    220             if (--i == 0)
    221                 goto finish;
    222 
    223             n = n*2 - d;
    224         } while(true);
    225         do {
    226             q <<= 1;
    227             n = n*2 - d;
    228             if (n>=0)   q |= 1;
    229             else        n += d;
    230         finish: ;
    231         } while (j--);
    232         return (ds<0) ? -q : q;
    233     }
    234 
    235     n -= d;
    236     if (n>=0)   q |= 1;
    237     else        n += d;
    238     j--;
    239     goto finish;
    240 }
    241 
    242 // ------------------------------------------------------------------------
    243 
    244 // assumes that the int32_t values of a, b, and c are all positive
    245 // use when both a and b are larger than c
    246 
    247 template <typename T>
    248 static inline void swap(T& a, T& b) {
    249     T t(a);
    250     a = b;
    251     b = t;
    252 }
    253 
    254 static __attribute__((noinline))
    255 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
    256 {
    257 	// first we compute a*b as a 64-bit integer
    258     // (GCC generates umull with the code below)
    259     uint64_t ab = uint64_t(a)*b;
    260     uint32_t hi = ab>>32;
    261     uint32_t lo = ab;
    262     uint32_t result;
    263 
    264 	// now perform the division
    265 	if (hi >= c) {
    266 	overflow:
    267 		result = 0x7fffffff;  // basic overflow
    268 	} else if (hi == 0) {
    269 		result = lo/c;  // note: c can't be 0
    270 		if ((result >> 31) != 0)  // result must fit in 31 bits
    271 			goto overflow;
    272 	} else {
    273 		uint32_t r = hi;
    274 		int bits = 31;
    275 	    result = 0;
    276 		do {
    277 			r = (r << 1) | (lo >> 31);
    278 			lo <<= 1;
    279 			result <<= 1;
    280 			if (r >= c) {
    281 				r -= c;
    282 				result |= 1;
    283 			}
    284 		} while (bits--);
    285 	}
    286 	return int32_t(result);
    287 }
    288 
    289 // assumes a >= 0 and c >= b >= 0
    290 static inline
    291 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
    292 {
    293     int32_t r = 0, q = 0, i;
    294     int leading = gglClz(a);
    295     i = 32 - leading;
    296     a <<= leading;
    297     do {
    298         r <<= 1;
    299         if (a < 0)
    300             r += b;
    301         a <<= 1;
    302         q <<= 1;
    303         if (r >= c) {
    304             r -= c;
    305             q++;
    306         }
    307         asm(""::); // gcc generates better code this way
    308         if (r >= c) {
    309             r -= c;
    310             q++;
    311         }
    312     }
    313     while (--i);
    314     return q;
    315 }
    316 
    317 // this function computes a*b/c with 64-bit intermediate accuracy
    318 // overflows (e.g. division by 0) are handled and return INT_MAX
    319 
    320 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
    321 {
    322 	int32_t result;
    323 	int32_t sign = a^b^c;
    324 
    325 	if (a < 0) a = -a;
    326 	if (b < 0) b = -b;
    327 	if (c < 0) c = -c;
    328 
    329     if (a < b) {
    330         swap(a, b);
    331     }
    332 
    333 	if (b <= c) result = quick_muldiv(a, b, c);
    334 	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
    335 
    336 	if (sign < 0)
    337 		result = -result;
    338 
    339     return result;
    340 }
    341