1 /* libs/pixelflinger/fixed.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <stdio.h> 19 20 #include <private/pixelflinger/ggl_context.h> 21 #include <private/pixelflinger/ggl_fixed.h> 22 23 24 // ------------------------------------------------------------------------ 25 26 int32_t gglRecipQNormalized(int32_t x, int* exponent) 27 { 28 const int32_t s = x>>31; 29 uint32_t a = s ? -x : x; 30 31 // the result will overflow, so just set it to the biggest/inf value 32 if (ggl_unlikely(a <= 2LU)) { 33 *exponent = 0; 34 return s ? FIXED_MIN : FIXED_MAX; 35 } 36 37 // Newton-Raphson iteration: 38 // x = r*(2 - a*r) 39 40 const int32_t lz = gglClz(a); 41 a <<= lz; // 0.32 42 uint32_t r = a; 43 // note: if a == 0x80000000, this means x was a power-of-2, in this 44 // case we don't need to compute anything. We get the reciprocal for 45 // (almost) free. 46 if (a != 0x80000000) { 47 r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a 48 // 0.32 + 2.30 = 2.62 -> 2.30 49 // 2.30 + 2.30 = 4.60 -> 2.30 50 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; 51 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; 52 } 53 54 // shift right 1-bit to make room for the sign bit 55 *exponent = 30-lz-1; 56 r >>= 1; 57 return s ? -r : r; 58 } 59 60 int32_t gglRecipQ(GGLfixed x, int q) 61 { 62 int shift; 63 x = gglRecipQNormalized(x, &shift); 64 shift += 16-q; 65 if (shift > 0) 66 x += 1L << (shift-1); // rounding 67 x >>= shift; 68 return x; 69 } 70 71 // ------------------------------------------------------------------------ 72 73 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) 74 { 75 if ((d>>24) && ((d>>24)+1)) { 76 n >>= 8; 77 d >>= 8; 78 } 79 return gglMulx(n, gglRecip(d)); 80 } 81 82 // ------------------------------------------------------------------------ 83 84 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = { 85 // 1/sqrt(x) with x = 1-N/16, N=[8...1] 86 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865 87 }; 88 89 GGLfixed gglSqrtRecipx(GGLfixed x) 90 { 91 if (x == 0) return FIXED_MAX; 92 if (x == FIXED_ONE) return x; 93 const GGLfixed a = x; 94 const int32_t lz = gglClz(x); 95 x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7]; 96 const int32_t exp = lz - 16; 97 if (exp <= 0) x >>= -exp>>1; 98 else x <<= (exp>>1) + (exp & 1); 99 if (exp & 1) { 100 x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1; 101 } 102 // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x) 103 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); 104 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); 105 return x; 106 } 107 108 GGLfixed gglSqrtx(GGLfixed a) 109 { 110 // Compute a full precision square-root (24 bits accuracy) 111 GGLfixed r = 0; 112 GGLfixed bit = 0x800000; 113 int32_t bshift = 15; 114 do { 115 GGLfixed temp = bit + (r<<1); 116 if (bshift >= 8) temp <<= (bshift-8); 117 else temp >>= (8-bshift); 118 if (a >= temp) { 119 r += bit; 120 a -= temp; 121 } 122 bshift--; 123 } while (bit>>=1); 124 return r; 125 } 126 127 // ------------------------------------------------------------------------ 128 129 static const GGLfixed ggl_log_approx_tab[] = { 130 // -ln(x)/ln(2) with x = N/16, N=[8...16] 131 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000 132 }; 133 134 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0] 135 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000 136 }; 137 138 GGLfixed gglPowx(GGLfixed x, GGLfixed y) 139 { 140 // prerequisite: 0 <= x <= 1, and y >=0 141 142 // pow(x,y) = 2^(y*log2(x)) 143 // = 2^(y*log2(x*(2^exp)*(2^-exp)))) 144 // = 2^(y*(log2(X)-exp)) 145 // = 2^(log2(X)*y - y*exp) 146 // = 2^( - (-log2(X)*y + y*exp) ) 147 148 int32_t exp = gglClz(x) - 16; 149 GGLfixed f = x << exp; 150 x = (f & 0x0FFF)<<4; 151 f = (f >> 12) & 0x7; 152 GGLfixed p = gglMulAddx( 153 ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x, 154 ggl_log_approx_tab[f]); 155 p = gglMulAddx(p, y, y*exp); 156 exp = gglFixedToIntFloor(p); 157 if (exp < 31) { 158 p = gglFracx(p); 159 x = (p & 0x1FFF)<<3; 160 p >>= 13; 161 p = gglMulAddx( 162 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x, 163 ggl_alog_approx_tab[p]); 164 p >>= exp; 165 } else { 166 p = 0; 167 } 168 return p; 169 // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f; 170 } 171 172 // ------------------------------------------------------------------------ 173 174 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) 175 { 176 //int32_t r =int32_t((int64_t(n)<<i)/d); 177 const int32_t ds = n^d; 178 if (n<0) n = -n; 179 if (d<0) d = -d; 180 int nd = gglClz(d) - gglClz(n); 181 i += nd + 1; 182 if (nd > 0) d <<= nd; 183 else n <<= -nd; 184 uint32_t q = 0; 185 186 int j = i & 7; 187 i >>= 3; 188 189 // gcc deals with the code below pretty well. 190 // we get 3.75 cycles per bit in the main loop 191 // and 8 cycles per bit in the termination loop 192 if (ggl_likely(i)) { 193 n -= d; 194 do { 195 q <<= 8; 196 if (n>=0) q |= 128; 197 else n += d; 198 n = n*2 - d; 199 if (n>=0) q |= 64; 200 else n += d; 201 n = n*2 - d; 202 if (n>=0) q |= 32; 203 else n += d; 204 n = n*2 - d; 205 if (n>=0) q |= 16; 206 else n += d; 207 n = n*2 - d; 208 if (n>=0) q |= 8; 209 else n += d; 210 n = n*2 - d; 211 if (n>=0) q |= 4; 212 else n += d; 213 n = n*2 - d; 214 if (n>=0) q |= 2; 215 else n += d; 216 n = n*2 - d; 217 if (n>=0) q |= 1; 218 else n += d; 219 220 if (--i == 0) 221 goto finish; 222 223 n = n*2 - d; 224 } while(true); 225 do { 226 q <<= 1; 227 n = n*2 - d; 228 if (n>=0) q |= 1; 229 else n += d; 230 finish: ; 231 } while (j--); 232 return (ds<0) ? -q : q; 233 } 234 235 n -= d; 236 if (n>=0) q |= 1; 237 else n += d; 238 j--; 239 goto finish; 240 } 241 242 // ------------------------------------------------------------------------ 243 244 // assumes that the int32_t values of a, b, and c are all positive 245 // use when both a and b are larger than c 246 247 template <typename T> 248 static inline void swap(T& a, T& b) { 249 T t(a); 250 a = b; 251 b = t; 252 } 253 254 static __attribute__((noinline)) 255 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c) 256 { 257 // first we compute a*b as a 64-bit integer 258 // (GCC generates umull with the code below) 259 uint64_t ab = uint64_t(a)*b; 260 uint32_t hi = ab>>32; 261 uint32_t lo = ab; 262 uint32_t result; 263 264 // now perform the division 265 if (hi >= c) { 266 overflow: 267 result = 0x7fffffff; // basic overflow 268 } else if (hi == 0) { 269 result = lo/c; // note: c can't be 0 270 if ((result >> 31) != 0) // result must fit in 31 bits 271 goto overflow; 272 } else { 273 uint32_t r = hi; 274 int bits = 31; 275 result = 0; 276 do { 277 r = (r << 1) | (lo >> 31); 278 lo <<= 1; 279 result <<= 1; 280 if (r >= c) { 281 r -= c; 282 result |= 1; 283 } 284 } while (bits--); 285 } 286 return int32_t(result); 287 } 288 289 // assumes a >= 0 and c >= b >= 0 290 static inline 291 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c) 292 { 293 int32_t r = 0, q = 0, i; 294 int leading = gglClz(a); 295 i = 32 - leading; 296 a <<= leading; 297 do { 298 r <<= 1; 299 if (a < 0) 300 r += b; 301 a <<= 1; 302 q <<= 1; 303 if (r >= c) { 304 r -= c; 305 q++; 306 } 307 asm(""::); // gcc generates better code this way 308 if (r >= c) { 309 r -= c; 310 q++; 311 } 312 } 313 while (--i); 314 return q; 315 } 316 317 // this function computes a*b/c with 64-bit intermediate accuracy 318 // overflows (e.g. division by 0) are handled and return INT_MAX 319 320 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c) 321 { 322 int32_t result; 323 int32_t sign = a^b^c; 324 325 if (a < 0) a = -a; 326 if (b < 0) b = -b; 327 if (c < 0) c = -c; 328 329 if (a < b) { 330 swap(a, b); 331 } 332 333 if (b <= c) result = quick_muldiv(a, b, c); 334 else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c); 335 336 if (sign < 0) 337 result = -result; 338 339 return result; 340 } 341