1 /* libs/pixelflinger/fixed.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <stdio.h> 19 20 #include <private/pixelflinger/ggl_context.h> 21 #include <private/pixelflinger/ggl_fixed.h> 22 23 24 // ------------------------------------------------------------------------ 25 26 int32_t gglRecipQNormalized(int32_t x, int* exponent) 27 { 28 const int32_t s = x>>31; 29 uint32_t a = s ? -x : x; 30 31 // the result will overflow, so just set it to the biggest/inf value 32 if (ggl_unlikely(a <= 2LU)) { 33 *exponent = 0; 34 return s ? FIXED_MIN : FIXED_MAX; 35 } 36 37 // Newton-Raphson iteration: 38 // x = r*(2 - a*r) 39 40 const int32_t lz = gglClz(a); 41 a <<= lz; // 0.32 42 uint32_t r = a; 43 // note: if a == 0x80000000, this means x was a power-of-2, in this 44 // case we don't need to compute anything. We get the reciprocal for 45 // (almost) free. 46 if (a != 0x80000000) { 47 r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a 48 // 0.32 + 2.30 = 2.62 -> 2.30 49 // 2.30 + 2.30 = 4.60 -> 2.30 50 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; 51 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; 52 } 53 54 // shift right 1-bit to make room for the sign bit 55 *exponent = 30-lz-1; 56 r >>= 1; 57 return s ? -r : r; 58 } 59 60 int32_t gglRecipQ(GGLfixed x, int q) 61 { 62 int shift; 63 x = gglRecipQNormalized(x, &shift); 64 shift += 16-q; 65 x += 1L << (shift-1); // rounding 66 x >>= shift; 67 return x; 68 } 69 70 // ------------------------------------------------------------------------ 71 72 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) 73 { 74 if ((d>>24) && ((d>>24)+1)) { 75 n >>= 8; 76 d >>= 8; 77 } 78 return gglMulx(n, gglRecip(d)); 79 } 80 81 // ------------------------------------------------------------------------ 82 83 static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = { 84 // 1/sqrt(x) with x = 1-N/16, N=[8...1] 85 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865 86 }; 87 88 GGLfixed gglSqrtRecipx(GGLfixed x) 89 { 90 if (x == 0) return FIXED_MAX; 91 if (x == FIXED_ONE) return x; 92 const GGLfixed a = x; 93 const int32_t lz = gglClz(x); 94 x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7]; 95 const int32_t exp = lz - 16; 96 if (exp <= 0) x >>= -exp>>1; 97 else x <<= (exp>>1) + (exp & 1); 98 if (exp & 1) { 99 x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1; 100 } 101 // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x) 102 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); 103 x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); 104 return x; 105 } 106 107 GGLfixed gglSqrtx(GGLfixed a) 108 { 109 // Compute a full precision square-root (24 bits accuracy) 110 GGLfixed r = 0; 111 GGLfixed bit = 0x800000; 112 int32_t bshift = 15; 113 do { 114 GGLfixed temp = bit + (r<<1); 115 if (bshift >= 8) temp <<= (bshift-8); 116 else temp >>= (8-bshift); 117 if (a >= temp) { 118 r += bit; 119 a -= temp; 120 } 121 bshift--; 122 } while (bit>>=1); 123 return r; 124 } 125 126 // ------------------------------------------------------------------------ 127 128 static const GGLfixed ggl_log_approx_tab[] = { 129 // -ln(x)/ln(2) with x = N/16, N=[8...16] 130 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000 131 }; 132 133 static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0] 134 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000 135 }; 136 137 GGLfixed gglPowx(GGLfixed x, GGLfixed y) 138 { 139 // prerequisite: 0 <= x <= 1, and y >=0 140 141 // pow(x,y) = 2^(y*log2(x)) 142 // = 2^(y*log2(x*(2^exp)*(2^-exp)))) 143 // = 2^(y*(log2(X)-exp)) 144 // = 2^(log2(X)*y - y*exp) 145 // = 2^( - (-log2(X)*y + y*exp) ) 146 147 int32_t exp = gglClz(x) - 16; 148 GGLfixed f = x << exp; 149 x = (f & 0x0FFF)<<4; 150 f = (f >> 12) & 0x7; 151 GGLfixed p = gglMulAddx( 152 ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x, 153 ggl_log_approx_tab[f]); 154 p = gglMulAddx(p, y, y*exp); 155 exp = gglFixedToIntFloor(p); 156 if (exp < 31) { 157 p = gglFracx(p); 158 x = (p & 0x1FFF)<<3; 159 p >>= 13; 160 p = gglMulAddx( 161 ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x, 162 ggl_alog_approx_tab[p]); 163 p >>= exp; 164 } else { 165 p = 0; 166 } 167 return p; 168 // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f; 169 } 170 171 // ------------------------------------------------------------------------ 172 173 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) 174 { 175 //int32_t r =int32_t((int64_t(n)<<i)/d); 176 const int32_t ds = n^d; 177 if (n<0) n = -n; 178 if (d<0) d = -d; 179 int nd = gglClz(d) - gglClz(n); 180 i += nd + 1; 181 if (nd > 0) d <<= nd; 182 else n <<= -nd; 183 uint32_t q = 0; 184 185 int j = i & 7; 186 i >>= 3; 187 188 // gcc deals with the code below pretty well. 189 // we get 3.75 cycles per bit in the main loop 190 // and 8 cycles per bit in the termination loop 191 if (ggl_likely(i)) { 192 n -= d; 193 do { 194 q <<= 8; 195 if (n>=0) q |= 128; 196 else n += d; 197 n = n*2 - d; 198 if (n>=0) q |= 64; 199 else n += d; 200 n = n*2 - d; 201 if (n>=0) q |= 32; 202 else n += d; 203 n = n*2 - d; 204 if (n>=0) q |= 16; 205 else n += d; 206 n = n*2 - d; 207 if (n>=0) q |= 8; 208 else n += d; 209 n = n*2 - d; 210 if (n>=0) q |= 4; 211 else n += d; 212 n = n*2 - d; 213 if (n>=0) q |= 2; 214 else n += d; 215 n = n*2 - d; 216 if (n>=0) q |= 1; 217 else n += d; 218 219 if (--i == 0) 220 goto finish; 221 222 n = n*2 - d; 223 } while(true); 224 do { 225 q <<= 1; 226 n = n*2 - d; 227 if (n>=0) q |= 1; 228 else n += d; 229 finish: ; 230 } while (j--); 231 return (ds<0) ? -q : q; 232 } 233 234 n -= d; 235 if (n>=0) q |= 1; 236 else n += d; 237 j--; 238 goto finish; 239 } 240 241 // ------------------------------------------------------------------------ 242 243 // assumes that the int32_t values of a, b, and c are all positive 244 // use when both a and b are larger than c 245 246 template <typename T> 247 static inline void swap(T& a, T& b) { 248 T t(a); 249 a = b; 250 b = t; 251 } 252 253 static __attribute__((noinline)) 254 int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c) 255 { 256 // first we compute a*b as a 64-bit integer 257 // (GCC generates umull with the code below) 258 uint64_t ab = uint64_t(a)*b; 259 uint32_t hi = ab>>32; 260 uint32_t lo = ab; 261 uint32_t result; 262 263 // now perform the division 264 if (hi >= c) { 265 overflow: 266 result = 0x7fffffff; // basic overflow 267 } else if (hi == 0) { 268 result = lo/c; // note: c can't be 0 269 if ((result >> 31) != 0) // result must fit in 31 bits 270 goto overflow; 271 } else { 272 uint32_t r = hi; 273 int bits = 31; 274 result = 0; 275 do { 276 r = (r << 1) | (lo >> 31); 277 lo <<= 1; 278 result <<= 1; 279 if (r >= c) { 280 r -= c; 281 result |= 1; 282 } 283 } while (bits--); 284 } 285 return int32_t(result); 286 } 287 288 // assumes a >= 0 and c >= b >= 0 289 static inline 290 int32_t quick_muldiv(int32_t a, int32_t b, int32_t c) 291 { 292 int32_t r = 0, q = 0, i; 293 int leading = gglClz(a); 294 i = 32 - leading; 295 a <<= leading; 296 do { 297 r <<= 1; 298 if (a < 0) 299 r += b; 300 a <<= 1; 301 q <<= 1; 302 if (r >= c) { 303 r -= c; 304 q++; 305 } 306 asm(""::); // gcc generates better code this way 307 if (r >= c) { 308 r -= c; 309 q++; 310 } 311 } 312 while (--i); 313 return q; 314 } 315 316 // this function computes a*b/c with 64-bit intermediate accuracy 317 // overflows (e.g. division by 0) are handled and return INT_MAX 318 319 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c) 320 { 321 int32_t result; 322 int32_t sign = a^b^c; 323 324 if (a < 0) a = -a; 325 if (b < 0) b = -b; 326 if (c < 0) c = -c; 327 328 if (a < b) { 329 swap(a, b); 330 } 331 332 if (b <= c) result = quick_muldiv(a, b, c); 333 else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c); 334 335 if (sign < 0) 336 result = -result; 337 338 return result; 339 } 340