1 /* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_GGL_FIXED_H 18 #define ANDROID_GGL_FIXED_H 19 20 #include <math.h> 21 #include <pixelflinger/pixelflinger.h> 22 23 // ---------------------------------------------------------------------------- 24 25 #define CONST __attribute__((const)) 26 #define ALWAYS_INLINE __attribute__((always_inline)) 27 28 const GGLfixed FIXED_BITS = 16; 29 const GGLfixed FIXED_EPSILON = 1; 30 const GGLfixed FIXED_ONE = 1L<<FIXED_BITS; 31 const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1); 32 const GGLfixed FIXED_MIN = 0x80000000L; 33 const GGLfixed FIXED_MAX = 0x7FFFFFFFL; 34 35 inline GGLfixed gglIntToFixed(GGLfixed i) ALWAYS_INLINE ; 36 inline GGLfixed gglFixedToIntRound(GGLfixed f) ALWAYS_INLINE ; 37 inline GGLfixed gglFixedToIntFloor(GGLfixed f) ALWAYS_INLINE ; 38 inline GGLfixed gglFixedToIntCeil(GGLfixed f) ALWAYS_INLINE ; 39 inline GGLfixed gglFracx(GGLfixed v) ALWAYS_INLINE ; 40 inline GGLfixed gglFloorx(GGLfixed v) ALWAYS_INLINE ; 41 inline GGLfixed gglCeilx(GGLfixed v) ALWAYS_INLINE ; 42 inline GGLfixed gglCenterx(GGLfixed v) ALWAYS_INLINE ; 43 inline GGLfixed gglRoundx(GGLfixed v) ALWAYS_INLINE ; 44 45 GGLfixed gglIntToFixed(GGLfixed i) { 46 return i<<FIXED_BITS; 47 } 48 GGLfixed gglFixedToIntRound(GGLfixed f) { 49 return (f + FIXED_HALF)>>FIXED_BITS; 50 } 51 GGLfixed gglFixedToIntFloor(GGLfixed f) { 52 return f>>FIXED_BITS; 53 } 54 GGLfixed gglFixedToIntCeil(GGLfixed f) { 55 return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS; 56 } 57 58 GGLfixed gglFracx(GGLfixed v) { 59 return v & ((1<<FIXED_BITS)-1); 60 } 61 GGLfixed gglFloorx(GGLfixed v) { 62 return gglFixedToIntFloor(v)<<FIXED_BITS; 63 } 64 GGLfixed gglCeilx(GGLfixed v) { 65 return gglFixedToIntCeil(v)<<FIXED_BITS; 66 } 67 GGLfixed gglCenterx(GGLfixed v) { 68 return gglFloorx(v + FIXED_HALF) | FIXED_HALF; 69 } 70 GGLfixed gglRoundx(GGLfixed v) { 71 return gglFixedToIntRound(v)<<FIXED_BITS; 72 } 73 74 // conversion from (unsigned) int, short, byte to fixed... 75 #define GGL_B_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<10 ) 76 #define GGL_S_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<2 ) 77 #define GGL_I_TO_X(_x) GGLfixed( ((int32_t(_x)>>1)+1)>>14 ) 78 #define GGL_UB_TO_X(_x) GGLfixed( uint32_t(_x) + \ 79 (uint32_t(_x)<<8) + \ 80 (uint32_t(_x)>>7) ) 81 #define GGL_US_TO_X(_x) GGLfixed( (_x) + ((_x)>>15) ) 82 #define GGL_UI_TO_X(_x) GGLfixed( (((_x)>>1)+1)>>15 ) 83 84 // ---------------------------------------------------------------------------- 85 86 GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST; 87 GGLfixed gglSqrtx(GGLfixed a) CONST; 88 GGLfixed gglSqrtRecipx(GGLfixed x) CONST; 89 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST; 90 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c); 91 92 int32_t gglRecipQNormalized(int32_t x, int* exponent); 93 int32_t gglRecipQ(GGLfixed x, int q) CONST; 94 95 inline GGLfixed gglRecip(GGLfixed x) CONST; 96 inline GGLfixed gglRecip(GGLfixed x) { 97 return gglRecipQ(x, 16); 98 } 99 100 inline GGLfixed gglRecip28(GGLfixed x) CONST; 101 int32_t gglRecip28(GGLfixed x) { 102 return gglRecipQ(x, 28); 103 } 104 105 // ---------------------------------------------------------------------------- 106 107 #if defined(__arm__) && !defined(__thumb__) 108 109 // inline ARM implementations 110 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST; 111 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) { 112 GGLfixed result, t; 113 if (__builtin_constant_p(shift)) { 114 asm("smull %[lo], %[hi], %[x], %[y] \n" 115 "movs %[lo], %[lo], lsr %[rshift] \n" 116 "adc %[lo], %[lo], %[hi], lsl %[lshift] \n" 117 : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x) 118 : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift) 119 : "cc" 120 ); 121 } else { 122 asm("smull %[lo], %[hi], %[x], %[y] \n" 123 "movs %[lo], %[lo], lsr %[rshift] \n" 124 "adc %[lo], %[lo], %[hi], lsl %[lshift] \n" 125 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) 126 : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift) 127 : "cc" 128 ); 129 } 130 return result; 131 } 132 133 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; 134 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) { 135 GGLfixed result, t; 136 if (__builtin_constant_p(shift)) { 137 asm("smull %[lo], %[hi], %[x], %[y] \n" 138 "add %[lo], %[a], %[lo], lsr %[rshift] \n" 139 "add %[lo], %[lo], %[hi], lsl %[lshift] \n" 140 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) 141 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift) 142 ); 143 } else { 144 asm("smull %[lo], %[hi], %[x], %[y] \n" 145 "add %[lo], %[a], %[lo], lsr %[rshift] \n" 146 "add %[lo], %[lo], %[hi], lsl %[lshift] \n" 147 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) 148 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift) 149 ); 150 } 151 return result; 152 } 153 154 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; 155 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) { 156 GGLfixed result, t; 157 if (__builtin_constant_p(shift)) { 158 asm("smull %[lo], %[hi], %[x], %[y] \n" 159 "rsb %[lo], %[a], %[lo], lsr %[rshift] \n" 160 "add %[lo], %[lo], %[hi], lsl %[lshift] \n" 161 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) 162 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift) 163 ); 164 } else { 165 asm("smull %[lo], %[hi], %[x], %[y] \n" 166 "rsb %[lo], %[a], %[lo], lsr %[rshift] \n" 167 "add %[lo], %[lo], %[hi], lsl %[lshift] \n" 168 : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) 169 : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift) 170 ); 171 } 172 return result; 173 } 174 175 inline int64_t gglMulii(int32_t x, int32_t y) CONST; 176 inline int64_t gglMulii(int32_t x, int32_t y) 177 { 178 // 64-bits result: r0=low, r1=high 179 union { 180 struct { 181 int32_t lo; 182 int32_t hi; 183 } s; 184 int64_t res; 185 }; 186 asm("smull %0, %1, %2, %3 \n" 187 : "=r"(s.lo), "=&r"(s.hi) 188 : "%r"(x), "r"(y) 189 : 190 ); 191 return res; 192 } 193 #elif defined(__mips__) && __mips_isa_rev < 6 194 195 /*inline MIPS implementations*/ 196 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; 197 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) { 198 GGLfixed result,tmp,tmp1,tmp2; 199 200 if (__builtin_constant_p(shift)) { 201 if (shift == 0) { 202 asm ("mult %[a], %[b] \t\n" 203 "mflo %[res] \t\n" 204 : [res]"=&r"(result),[tmp]"=&r"(tmp) 205 : [a]"r"(a),[b]"r"(b) 206 : "%hi","%lo" 207 ); 208 } else if (shift == 32) 209 { 210 asm ("mult %[a], %[b] \t\n" 211 "li %[tmp],1\t\n" 212 "sll %[tmp],%[tmp],0x1f\t\n" 213 "mflo %[res] \t\n" 214 "addu %[tmp1],%[tmp],%[res] \t\n" 215 "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/ 216 "sra %[tmp],%[tmp],0x1f \t\n" 217 "mfhi %[res] \t\n" 218 "addu %[res],%[res],%[tmp]\t\n" 219 "addu %[res],%[res],%[tmp1]\t\n" 220 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1) 221 : [a]"r"(a),[b]"r"(b),[shift]"I"(shift) 222 : "%hi","%lo" 223 ); 224 } else if ((shift >0) && (shift < 32)) 225 { 226 asm ("mult %[a], %[b] \t\n" 227 "li %[tmp],1 \t\n" 228 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 229 "mflo %[res] \t\n" 230 "addu %[tmp1],%[tmp],%[res] \t\n" 231 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 232 "addu %[res],%[res],%[tmp] \t\n" 233 "mfhi %[tmp] \t\n" 234 "addu %[tmp],%[tmp],%[tmp1] \t\n" 235 "sll %[tmp],%[tmp],%[lshift] \t\n" 236 "srl %[res],%[res],%[rshift] \t\n" 237 "or %[res],%[res],%[tmp] \t\n" 238 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 239 : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1) 240 : "%hi","%lo" 241 ); 242 } else { 243 asm ("mult %[a], %[b] \t\n" 244 "li %[tmp],1 \t\n" 245 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 246 "mflo %[res] \t\n" 247 "addu %[tmp1],%[tmp],%[res] \t\n" 248 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 249 "sra %[tmp2],%[tmp],0x1f \t\n" 250 "addu %[res],%[res],%[tmp] \t\n" 251 "mfhi %[tmp] \t\n" 252 "addu %[tmp],%[tmp],%[tmp2] \t\n" 253 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ 254 "srl %[tmp2],%[res],%[rshift] \t\n" 255 "srav %[res], %[tmp],%[rshift]\t\n" 256 "sll %[tmp],%[tmp],1 \t\n" 257 "sll %[tmp],%[tmp],%[norbits] \t\n" 258 "or %[tmp],%[tmp],%[tmp2] \t\n" 259 "movz %[res],%[tmp],%[bit5] \t\n" 260 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 261 : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20) 262 : "%hi","%lo" 263 ); 264 } 265 } else { 266 asm ("mult %[a], %[b] \t\n" 267 "li %[tmp],1 \t\n" 268 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 269 "mflo %[res] \t\n" 270 "addu %[tmp1],%[tmp],%[res] \t\n" 271 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 272 "sra %[tmp2],%[tmp],0x1f \t\n" 273 "addu %[res],%[res],%[tmp] \t\n" 274 "mfhi %[tmp] \t\n" 275 "addu %[tmp],%[tmp],%[tmp2] \t\n" 276 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ 277 "srl %[tmp2],%[res],%[rshift] \t\n" 278 "srav %[res], %[tmp],%[rshift]\t\n" 279 "sll %[tmp],%[tmp],1 \t\n" 280 "sll %[tmp],%[tmp],%[norbits] \t\n" 281 "or %[tmp],%[tmp],%[tmp2] \t\n" 282 "movz %[res],%[tmp],%[bit5] \t\n" 283 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 284 : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20) 285 : "%hi","%lo" 286 ); 287 } 288 289 return result; 290 } 291 292 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 293 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 294 GGLfixed result,t,tmp1,tmp2; 295 296 if (__builtin_constant_p(shift)) { 297 if (shift == 0) { 298 asm ("mult %[a], %[b] \t\n" 299 "mflo %[lo] \t\n" 300 "addu %[lo],%[lo],%[c] \t\n" 301 : [lo]"=&r"(result) 302 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 303 : "%hi","%lo" 304 ); 305 } else if (shift == 32) { 306 asm ("mult %[a], %[b] \t\n" 307 "mfhi %[lo] \t\n" 308 "addu %[lo],%[lo],%[c] \t\n" 309 : [lo]"=&r"(result) 310 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 311 : "%hi","%lo" 312 ); 313 } else if ((shift>0) && (shift<32)) { 314 asm ("mult %[a], %[b] \t\n" 315 "mflo %[res] \t\n" 316 "mfhi %[t] \t\n" 317 "srl %[res],%[res],%[rshift] \t\n" 318 "sll %[t],%[t],%[lshift] \t\n" 319 "or %[res],%[res],%[t] \t\n" 320 "addu %[res],%[res],%[c] \t\n" 321 : [res]"=&r"(result),[t]"=&r"(t) 322 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) 323 : "%hi","%lo" 324 ); 325 } else { 326 asm ("mult %[a], %[b] \t\n" 327 "nor %[tmp1],$zero,%[shift]\t\n" 328 "mflo %[res] \t\n" 329 "mfhi %[t] \t\n" 330 "srl %[res],%[res],%[shift] \t\n" 331 "sll %[tmp2],%[t],1 \t\n" 332 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 333 "or %[tmp1],%[tmp2],%[res] \t\n" 334 "srav %[res],%[t],%[shift] \t\n" 335 "andi %[tmp2],%[shift],0x20\t\n" 336 "movz %[res],%[tmp1],%[tmp2]\t\n" 337 "addu %[res],%[res],%[c] \t\n" 338 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 339 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) 340 : "%hi","%lo" 341 ); 342 } 343 } else { 344 asm ("mult %[a], %[b] \t\n" 345 "nor %[tmp1],$zero,%[shift]\t\n" 346 "mflo %[res] \t\n" 347 "mfhi %[t] \t\n" 348 "srl %[res],%[res],%[shift] \t\n" 349 "sll %[tmp2],%[t],1 \t\n" 350 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 351 "or %[tmp1],%[tmp2],%[res] \t\n" 352 "srav %[res],%[t],%[shift] \t\n" 353 "andi %[tmp2],%[shift],0x20\t\n" 354 "movz %[res],%[tmp1],%[tmp2]\t\n" 355 "addu %[res],%[res],%[c] \t\n" 356 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 357 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) 358 : "%hi","%lo" 359 ); 360 } 361 return result; 362 } 363 364 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 365 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 366 GGLfixed result,t,tmp1,tmp2; 367 368 if (__builtin_constant_p(shift)) { 369 if (shift == 0) { 370 asm ("mult %[a], %[b] \t\n" 371 "mflo %[lo] \t\n" 372 "subu %[lo],%[lo],%[c] \t\n" 373 : [lo]"=&r"(result) 374 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 375 : "%hi","%lo" 376 ); 377 } else if (shift == 32) { 378 asm ("mult %[a], %[b] \t\n" 379 "mfhi %[lo] \t\n" 380 "subu %[lo],%[lo],%[c] \t\n" 381 : [lo]"=&r"(result) 382 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 383 : "%hi","%lo" 384 ); 385 } else if ((shift>0) && (shift<32)) { 386 asm ("mult %[a], %[b] \t\n" 387 "mflo %[res] \t\n" 388 "mfhi %[t] \t\n" 389 "srl %[res],%[res],%[rshift] \t\n" 390 "sll %[t],%[t],%[lshift] \t\n" 391 "or %[res],%[res],%[t] \t\n" 392 "subu %[res],%[res],%[c] \t\n" 393 : [res]"=&r"(result),[t]"=&r"(t) 394 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) 395 : "%hi","%lo" 396 ); 397 } else { 398 asm ("mult %[a], %[b] \t\n" 399 "nor %[tmp1],$zero,%[shift]\t\n" 400 "mflo %[res] \t\n" 401 "mfhi %[t] \t\n" 402 "srl %[res],%[res],%[shift] \t\n" 403 "sll %[tmp2],%[t],1 \t\n" 404 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 405 "or %[tmp1],%[tmp2],%[res] \t\n" 406 "srav %[res],%[t],%[shift] \t\n" 407 "andi %[tmp2],%[shift],0x20\t\n" 408 "movz %[res],%[tmp1],%[tmp2]\t\n" 409 "subu %[res],%[res],%[c] \t\n" 410 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 411 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) 412 : "%hi","%lo" 413 ); 414 } 415 } else { 416 asm ("mult %[a], %[b] \t\n" 417 "nor %[tmp1],$zero,%[shift]\t\n" 418 "mflo %[res] \t\n" 419 "mfhi %[t] \t\n" 420 "srl %[res],%[res],%[shift] \t\n" 421 "sll %[tmp2],%[t],1 \t\n" 422 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 423 "or %[tmp1],%[tmp2],%[res] \t\n" 424 "srav %[res],%[t],%[shift] \t\n" 425 "andi %[tmp2],%[shift],0x20\t\n" 426 "movz %[res],%[tmp1],%[tmp2]\t\n" 427 "subu %[res],%[res],%[c] \t\n" 428 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 429 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) 430 : "%hi","%lo" 431 ); 432 } 433 return result; 434 } 435 436 inline int64_t gglMulii(int32_t x, int32_t y) CONST; 437 inline int64_t gglMulii(int32_t x, int32_t y) { 438 union { 439 struct { 440 #if defined(__MIPSEL__) 441 int32_t lo; 442 int32_t hi; 443 #elif defined(__MIPSEB__) 444 int32_t hi; 445 int32_t lo; 446 #endif 447 } s; 448 int64_t res; 449 }u; 450 asm("mult %2, %3 \t\n" 451 "mfhi %1 \t\n" 452 "mflo %0 \t\n" 453 : "=r"(u.s.lo), "=&r"(u.s.hi) 454 : "%r"(x), "r"(y) 455 : "%hi","%lo" 456 ); 457 return u.res; 458 } 459 460 #elif defined(__aarch64__) 461 462 // inline AArch64 implementations 463 464 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST; 465 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) 466 { 467 GGLfixed result; 468 GGLfixed round; 469 470 asm("mov %x[round], #1 \n" 471 "lsl %x[round], %x[round], %x[shift] \n" 472 "lsr %x[round], %x[round], #1 \n" 473 "smaddl %x[result], %w[x], %w[y],%x[round] \n" 474 "lsr %x[result], %x[result], %x[shift] \n" 475 : [round]"=&r"(round), [result]"=&r"(result) \ 476 : [x]"r"(x), [y]"r"(y), [shift] "r"(shift) \ 477 : 478 ); 479 return result; 480 } 481 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; 482 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) 483 { 484 GGLfixed result; 485 asm("smull %x[result], %w[x], %w[y] \n" 486 "lsr %x[result], %x[result], %x[shift] \n" 487 "add %w[result], %w[result], %w[a] \n" 488 : [result]"=&r"(result) \ 489 : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ 490 : 491 ); 492 return result; 493 } 494 495 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; 496 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) 497 { 498 499 GGLfixed result; 500 int rshift; 501 502 asm("smull %x[result], %w[x], %w[y] \n" 503 "lsr %x[result], %x[result], %x[shift] \n" 504 "sub %w[result], %w[result], %w[a] \n" 505 : [result]"=&r"(result) \ 506 : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ 507 : 508 ); 509 return result; 510 } 511 inline int64_t gglMulii(int32_t x, int32_t y) CONST; 512 inline int64_t gglMulii(int32_t x, int32_t y) 513 { 514 int64_t res; 515 asm("smull %x0, %w1, %w2 \n" 516 : "=r"(res) 517 : "%r"(x), "r"(y) 518 : 519 ); 520 return res; 521 } 522 523 #elif defined(__mips__) && __mips_isa_rev == 6 524 525 /*inline MIPS implementations*/ 526 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; 527 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) { 528 GGLfixed result,tmp,tmp1,tmp2; 529 530 if (__builtin_constant_p(shift)) { 531 if (shift == 0) { 532 asm ("mul %[res], %[a], %[b] \t\n" 533 : [res]"=&r"(result) 534 : [a]"r"(a),[b]"r"(b) 535 ); 536 } else if (shift == 32) 537 { 538 asm ("mul %[res], %[a], %[b] \t\n" 539 "li %[tmp],1\t\n" 540 "sll %[tmp],%[tmp],0x1f\t\n" 541 "addu %[tmp1],%[tmp],%[res] \t\n" 542 "muh %[res], %[a], %[b] \t\n" 543 "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/ 544 "sra %[tmp],%[tmp],0x1f \t\n" 545 "addu %[res],%[res],%[tmp]\t\n" 546 "addu %[res],%[res],%[tmp1]\t\n" 547 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1) 548 : [a]"r"(a),[b]"r"(b),[shift]"I"(shift) 549 ); 550 } else if ((shift >0) && (shift < 32)) 551 { 552 asm ("mul %[res], %[a], %[b] \t\n" 553 "li %[tmp],1 \t\n" 554 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 555 "addu %[tmp1],%[tmp],%[res] \t\n" 556 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 557 "addu %[res],%[res],%[tmp] \t\n" 558 "muh %[tmp], %[a], %[b] \t\n" 559 "addu %[tmp],%[tmp],%[tmp1] \t\n" 560 "sll %[tmp],%[tmp],%[lshift] \t\n" 561 "srl %[res],%[res],%[rshift] \t\n" 562 "or %[res],%[res],%[tmp] \t\n" 563 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 564 : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1) 565 ); 566 } else { 567 asm ("mul %[res], %[a], %[b] \t\n" 568 "li %[tmp],1 \t\n" 569 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 570 "addu %[tmp1],%[tmp],%[res] \t\n" 571 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 572 "sra %[tmp2],%[tmp],0x1f \t\n" 573 "addu %[res],%[res],%[tmp] \t\n" 574 "muh %[tmp], %[a], %[b] \t\n" 575 "addu %[tmp],%[tmp],%[tmp2] \t\n" 576 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ 577 "srl %[tmp2],%[res],%[rshift] \t\n" 578 "srav %[res], %[tmp],%[rshift]\t\n" 579 "sll %[tmp],%[tmp],1 \t\n" 580 "sll %[tmp],%[tmp],%[norbits] \t\n" 581 "or %[tmp],%[tmp],%[tmp2] \t\n" 582 "seleqz %[tmp],%[tmp],%[bit5] \t\n" 583 "selnez %[res],%[res],%[bit5] \t\n" 584 "or %[res],%[res],%[tmp] \t\n" 585 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 586 : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20) 587 ); 588 } 589 } else { 590 asm ("mul %[res], %[a], %[b] \t\n" 591 "li %[tmp],1 \t\n" 592 "sll %[tmp],%[tmp],%[shiftm1] \t\n" 593 "addu %[tmp1],%[tmp],%[res] \t\n" 594 "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ 595 "sra %[tmp2],%[tmp],0x1f \t\n" 596 "addu %[res],%[res],%[tmp] \t\n" 597 "muh %[tmp], %[a], %[b] \t\n" 598 "addu %[tmp],%[tmp],%[tmp2] \t\n" 599 "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ 600 "srl %[tmp2],%[res],%[rshift] \t\n" 601 "srav %[res], %[tmp],%[rshift]\t\n" 602 "sll %[tmp],%[tmp],1 \t\n" 603 "sll %[tmp],%[tmp],%[norbits] \t\n" 604 "or %[tmp],%[tmp],%[tmp2] \t\n" 605 "seleqz %[tmp],%[tmp],%[bit5] \t\n" 606 "selnez %[res],%[res],%[bit5] \t\n" 607 "or %[res],%[res],%[tmp] \t\n" 608 : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 609 : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20) 610 ); 611 } 612 return result; 613 } 614 615 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 616 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 617 GGLfixed result,t,tmp1,tmp2; 618 619 if (__builtin_constant_p(shift)) { 620 if (shift == 0) { 621 asm ("mul %[lo], %[a], %[b] \t\n" 622 "addu %[lo],%[lo],%[c] \t\n" 623 : [lo]"=&r"(result) 624 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 625 ); 626 } else if (shift == 32) { 627 asm ("muh %[lo], %[a], %[b] \t\n" 628 "addu %[lo],%[lo],%[c] \t\n" 629 : [lo]"=&r"(result) 630 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 631 ); 632 } else if ((shift>0) && (shift<32)) { 633 asm ("mul %[res], %[a], %[b] \t\n" 634 "muh %[t], %[a], %[b] \t\n" 635 "srl %[res],%[res],%[rshift] \t\n" 636 "sll %[t],%[t],%[lshift] \t\n" 637 "or %[res],%[res],%[t] \t\n" 638 "addu %[res],%[res],%[c] \t\n" 639 : [res]"=&r"(result),[t]"=&r"(t) 640 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) 641 ); 642 } else { 643 asm ("mul %[res], %[a], %[b] \t\n" 644 "muh %[t], %[a], %[b] \t\n" 645 "nor %[tmp1],$zero,%[shift]\t\n" 646 "srl %[res],%[res],%[shift] \t\n" 647 "sll %[tmp2],%[t],1 \t\n" 648 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 649 "or %[tmp1],%[tmp2],%[res] \t\n" 650 "srav %[res],%[t],%[shift] \t\n" 651 "andi %[tmp2],%[shift],0x20\t\n" 652 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n" 653 "selnez %[res],%[res],%[tmp2]\t\n" 654 "or %[res],%[res],%[tmp1]\t\n" 655 "addu %[res],%[res],%[c] \t\n" 656 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 657 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) 658 ); 659 } 660 } else { 661 asm ("mul %[res], %[a], %[b] \t\n" 662 "muh %[t], %[a], %[b] \t\n" 663 "nor %[tmp1],$zero,%[shift]\t\n" 664 "srl %[res],%[res],%[shift] \t\n" 665 "sll %[tmp2],%[t],1 \t\n" 666 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 667 "or %[tmp1],%[tmp2],%[res] \t\n" 668 "srav %[res],%[t],%[shift] \t\n" 669 "andi %[tmp2],%[shift],0x20\t\n" 670 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n" 671 "selnez %[res],%[res],%[tmp2]\t\n" 672 "or %[res],%[res],%[tmp1]\t\n" 673 "addu %[res],%[res],%[c] \t\n" 674 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 675 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) 676 ); 677 } 678 return result; 679 } 680 681 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 682 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 683 GGLfixed result,t,tmp1,tmp2; 684 685 if (__builtin_constant_p(shift)) { 686 if (shift == 0) { 687 asm ("mul %[lo], %[a], %[b] \t\n" 688 "subu %[lo],%[lo],%[c] \t\n" 689 : [lo]"=&r"(result) 690 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 691 ); 692 } else if (shift == 32) { 693 asm ("muh %[lo], %[a], %[b] \t\n" 694 "subu %[lo],%[lo],%[c] \t\n" 695 : [lo]"=&r"(result) 696 : [a]"r"(a),[b]"r"(b),[c]"r"(c) 697 ); 698 } else if ((shift>0) && (shift<32)) { 699 asm ("mul %[res], %[a], %[b] \t\n" 700 "muh %[t], %[a], %[b] \t\n" 701 "srl %[res],%[res],%[rshift] \t\n" 702 "sll %[t],%[t],%[lshift] \t\n" 703 "or %[res],%[res],%[t] \t\n" 704 "subu %[res],%[res],%[c] \t\n" 705 : [res]"=&r"(result),[t]"=&r"(t) 706 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) 707 ); 708 } else { 709 asm ("mul %[res], %[a], %[b] \t\n" 710 "muh %[t], %[a], %[b] \t\n" 711 "nor %[tmp1],$zero,%[shift]\t\n" 712 "srl %[res],%[res],%[shift] \t\n" 713 "sll %[tmp2],%[t],1 \t\n" 714 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 715 "or %[tmp1],%[tmp2],%[res] \t\n" 716 "srav %[res],%[t],%[shift] \t\n" 717 "andi %[tmp2],%[shift],0x20\t\n" 718 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n" 719 "selnez %[res],%[res],%[tmp2]\t\n" 720 "or %[res],%[res],%[tmp1]\t\n" 721 "subu %[res],%[res],%[c] \t\n" 722 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 723 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) 724 ); 725 } 726 } else { 727 asm ("mul %[res], %[a], %[b] \t\n" 728 "muh %[t], %[a], %[b] \t\n" 729 "nor %[tmp1],$zero,%[shift]\t\n" 730 "srl %[res],%[res],%[shift] \t\n" 731 "sll %[tmp2],%[t],1 \t\n" 732 "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" 733 "or %[tmp1],%[tmp2],%[res] \t\n" 734 "srav %[res],%[t],%[shift] \t\n" 735 "andi %[tmp2],%[shift],0x20\t\n" 736 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n" 737 "selnez %[res],%[res],%[tmp2]\t\n" 738 "or %[res],%[res],%[tmp1]\t\n" 739 "subu %[res],%[res],%[c] \t\n" 740 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) 741 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) 742 ); 743 } 744 return result; 745 } 746 747 inline int64_t gglMulii(int32_t x, int32_t y) CONST; 748 inline int64_t gglMulii(int32_t x, int32_t y) { 749 union { 750 struct { 751 #if defined(__MIPSEL__) 752 int32_t lo; 753 int32_t hi; 754 #elif defined(__MIPSEB__) 755 int32_t hi; 756 int32_t lo; 757 #endif 758 } s; 759 int64_t res; 760 }u; 761 asm("mul %0, %2, %3 \t\n" 762 "muh %1, %2, %3 \t\n" 763 : "=r"(u.s.lo), "=&r"(u.s.hi) 764 : "%r"(x), "r"(y) 765 ); 766 return u.res; 767 } 768 769 #else // ---------------------------------------------------------------------- 770 771 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; 772 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) { 773 return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift); 774 } 775 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 776 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 777 return GGLfixed((int64_t(a)*b)>>shift) + c; 778 } 779 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; 780 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { 781 return GGLfixed((int64_t(a)*b)>>shift) - c; 782 } 783 inline int64_t gglMulii(int32_t a, int32_t b) CONST; 784 inline int64_t gglMulii(int32_t a, int32_t b) { 785 return int64_t(a)*b; 786 } 787 788 #endif 789 790 // ------------------------------------------------------------------------ 791 792 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST; 793 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) { 794 return gglMulx(a, b, 16); 795 } 796 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST; 797 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) { 798 return gglMulAddx(a, b, c, 16); 799 } 800 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST; 801 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) { 802 return gglMulSubx(a, b, c, 16); 803 } 804 805 // ------------------------------------------------------------------------ 806 807 inline int32_t gglClz(int32_t x) CONST; 808 inline int32_t gglClz(int32_t x) 809 { 810 #if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__) 811 return __builtin_clz(x); 812 #else 813 if (!x) return 32; 814 int32_t exp = 31; 815 if (x & 0xFFFF0000) { exp -=16; x >>= 16; } 816 if (x & 0x0000ff00) { exp -= 8; x >>= 8; } 817 if (x & 0x000000f0) { exp -= 4; x >>= 4; } 818 if (x & 0x0000000c) { exp -= 2; x >>= 2; } 819 if (x & 0x00000002) { exp -= 1; } 820 return exp; 821 #endif 822 } 823 824 // ------------------------------------------------------------------------ 825 826 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST; 827 828 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST; 829 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) { 830 return gglDivQ(n, d, 16); 831 } 832 833 inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST; 834 inline int32_t gglDivx(GGLfixed n, GGLfixed d) { 835 return gglDivQ(n, d, 16); 836 } 837 838 // ------------------------------------------------------------------------ 839 840 inline GGLfixed gglRecipFast(GGLfixed x) CONST; 841 inline GGLfixed gglRecipFast(GGLfixed x) 842 { 843 // This is a really bad approximation of 1/x, but it's also 844 // very fast. x must be strictly positive. 845 // if x between [0.5, 1[ , then 1/x = 3-2*x 846 // (we use 2.30 fixed-point) 847 const int32_t lz = gglClz(x); 848 return (0xC0000000 - (x << (lz - 1))) >> (30-lz); 849 } 850 851 // ------------------------------------------------------------------------ 852 853 inline GGLfixed gglClampx(GGLfixed c) CONST; 854 inline GGLfixed gglClampx(GGLfixed c) 855 { 856 #if defined(__thumb__) 857 // clamp without branches 858 c &= ~(c>>31); c = FIXED_ONE - c; 859 c &= ~(c>>31); c = FIXED_ONE - c; 860 #else 861 #if defined(__arm__) 862 // I don't know why gcc thinks its smarter than me! The code below 863 // clamps to zero in one instruction, but gcc won't generate it and 864 // replace it by a cmp + movlt (it's quite amazing actually). 865 asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c)); 866 #elif defined(__aarch64__) 867 asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c)); 868 #else 869 c &= ~(c>>31); 870 #endif 871 if (c>FIXED_ONE) 872 c = FIXED_ONE; 873 #endif 874 return c; 875 } 876 877 // ------------------------------------------------------------------------ 878 879 #endif // ANDROID_GGL_FIXED_H 880