Home | History | Annotate | Download | only in pixelflinger
      1 /*
      2  * Copyright (C) 2005 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_GGL_FIXED_H
     18 #define ANDROID_GGL_FIXED_H
     19 
     20 #include <math.h>
     21 #include <pixelflinger/pixelflinger.h>
     22 
     23 // ----------------------------------------------------------------------------
     24 
     25 #define CONST           __attribute__((const))
     26 #define ALWAYS_INLINE   __attribute__((always_inline))
     27 
     28 const GGLfixed FIXED_BITS = 16;
     29 const GGLfixed FIXED_EPSILON  = 1;
     30 const GGLfixed FIXED_ONE  = 1L<<FIXED_BITS;
     31 const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
     32 const GGLfixed FIXED_MIN  = 0x80000000L;
     33 const GGLfixed FIXED_MAX  = 0x7FFFFFFFL;
     34 
     35 inline GGLfixed gglIntToFixed(GGLfixed i)       ALWAYS_INLINE ;
     36 inline GGLfixed gglFixedToIntRound(GGLfixed f)  ALWAYS_INLINE ;
     37 inline GGLfixed gglFixedToIntFloor(GGLfixed f)  ALWAYS_INLINE ;
     38 inline GGLfixed gglFixedToIntCeil(GGLfixed f)   ALWAYS_INLINE ;
     39 inline GGLfixed gglFracx(GGLfixed v)            ALWAYS_INLINE ;
     40 inline GGLfixed gglFloorx(GGLfixed v)           ALWAYS_INLINE ;
     41 inline GGLfixed gglCeilx(GGLfixed v)            ALWAYS_INLINE ;
     42 inline GGLfixed gglCenterx(GGLfixed v)          ALWAYS_INLINE ;
     43 inline GGLfixed gglRoundx(GGLfixed v)           ALWAYS_INLINE ;
     44 
     45 GGLfixed gglIntToFixed(GGLfixed i) {
     46     return i<<FIXED_BITS;
     47 }
     48 GGLfixed gglFixedToIntRound(GGLfixed f) {
     49     return (f + FIXED_HALF)>>FIXED_BITS;
     50 }
     51 GGLfixed gglFixedToIntFloor(GGLfixed f) {
     52     return f>>FIXED_BITS;
     53 }
     54 GGLfixed gglFixedToIntCeil(GGLfixed f) {
     55     return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
     56 }
     57 
     58 GGLfixed gglFracx(GGLfixed v) {
     59     return v & ((1<<FIXED_BITS)-1);
     60 }
     61 GGLfixed gglFloorx(GGLfixed v) {
     62     return gglFixedToIntFloor(v)<<FIXED_BITS;
     63 }
     64 GGLfixed gglCeilx(GGLfixed v) {
     65     return gglFixedToIntCeil(v)<<FIXED_BITS;
     66 }
     67 GGLfixed gglCenterx(GGLfixed v) {
     68     return gglFloorx(v + FIXED_HALF) | FIXED_HALF;
     69 }
     70 GGLfixed gglRoundx(GGLfixed v) {
     71     return gglFixedToIntRound(v)<<FIXED_BITS;
     72 }
     73 
     74 // conversion from (unsigned) int, short, byte to fixed...
     75 #define GGL_B_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
     76 #define GGL_S_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
     77 #define GGL_I_TO_X(_x)      GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
     78 #define GGL_UB_TO_X(_x)     GGLfixed(   uint32_t(_x) +      \
     79                                         (uint32_t(_x)<<8) + \
     80                                         (uint32_t(_x)>>7) )
     81 #define GGL_US_TO_X(_x)     GGLfixed( (_x) + ((_x)>>15) )
     82 #define GGL_UI_TO_X(_x)     GGLfixed( (((_x)>>1)+1)>>15 )
     83 
     84 // ----------------------------------------------------------------------------
     85 
     86 GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
     87 GGLfixed gglSqrtx(GGLfixed a) CONST;
     88 GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
     89 GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
     90 int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
     91 
     92 int32_t gglRecipQNormalized(int32_t x, int* exponent);
     93 int32_t gglRecipQ(GGLfixed x, int q) CONST;
     94 
     95 inline GGLfixed gglRecip(GGLfixed x) CONST;
     96 inline GGLfixed gglRecip(GGLfixed x) {
     97     return gglRecipQ(x, 16);
     98 }
     99 
    100 inline GGLfixed gglRecip28(GGLfixed x) CONST;
    101 int32_t gglRecip28(GGLfixed x) {
    102     return gglRecipQ(x, 28);
    103 }
    104 
    105 // ----------------------------------------------------------------------------
    106 
    107 #if defined(__arm__) && !defined(__thumb__)
    108 
    109 // inline ARM implementations
    110 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
    111 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
    112     GGLfixed result, t;
    113     if (__builtin_constant_p(shift)) {
    114     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    115         "movs   %[lo], %[lo], lsr %[rshift]         \n"
    116         "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    117         : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
    118         : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
    119         : "cc"
    120         );
    121     } else {
    122     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    123         "movs   %[lo], %[lo], lsr %[rshift]         \n"
    124         "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    125         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
    126         : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
    127         : "cc"
    128         );
    129     }
    130     return result;
    131 }
    132 
    133 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
    134 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
    135     GGLfixed result, t;
    136     if (__builtin_constant_p(shift)) {
    137     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    138         "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
    139         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    140         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
    141         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
    142         );
    143     } else {
    144     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    145         "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
    146         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    147         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
    148         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
    149         );
    150     }
    151     return result;
    152 }
    153 
    154 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
    155 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
    156     GGLfixed result, t;
    157     if (__builtin_constant_p(shift)) {
    158     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    159         "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
    160         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    161         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
    162         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
    163         );
    164     } else {
    165     asm("smull  %[lo], %[hi], %[x], %[y]            \n"
    166         "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
    167         "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
    168         : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
    169         : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
    170         );
    171     }
    172     return result;
    173 }
    174 
    175 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
    176 inline int64_t gglMulii(int32_t x, int32_t y)
    177 {
    178     // 64-bits result: r0=low, r1=high
    179     union {
    180         struct {
    181             int32_t lo;
    182             int32_t hi;
    183         } s;
    184         int64_t res;
    185     };
    186     asm("smull %0, %1, %2, %3   \n"
    187         : "=r"(s.lo), "=&r"(s.hi)
    188         : "%r"(x), "r"(y)
    189         :
    190         );
    191     return res;
    192 }
    193 #elif defined(__mips__) && __mips_isa_rev < 6
    194 
    195 /*inline MIPS implementations*/
    196 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
    197 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
    198     GGLfixed result,tmp,tmp1,tmp2;
    199 
    200     if (__builtin_constant_p(shift)) {
    201         if (shift == 0) {
    202             asm ("mult %[a], %[b] \t\n"
    203               "mflo  %[res]   \t\n"
    204             : [res]"=&r"(result),[tmp]"=&r"(tmp)
    205             : [a]"r"(a),[b]"r"(b)
    206             : "%hi","%lo"
    207             );
    208         } else if (shift == 32)
    209         {
    210             asm ("mult %[a], %[b] \t\n"
    211             "li  %[tmp],1\t\n"
    212             "sll  %[tmp],%[tmp],0x1f\t\n"
    213             "mflo %[res]   \t\n"
    214             "addu %[tmp1],%[tmp],%[res] \t\n"
    215             "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
    216             "sra %[tmp],%[tmp],0x1f \t\n"
    217             "mfhi  %[res]   \t\n"
    218             "addu %[res],%[res],%[tmp]\t\n"
    219             "addu %[res],%[res],%[tmp1]\t\n"
    220             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
    221             : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
    222             : "%hi","%lo"
    223             );
    224         } else if ((shift >0) && (shift < 32))
    225         {
    226             asm ("mult %[a], %[b] \t\n"
    227             "li  %[tmp],1 \t\n"
    228             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    229             "mflo  %[res]   \t\n"
    230             "addu %[tmp1],%[tmp],%[res] \t\n"
    231             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    232             "addu  %[res],%[res],%[tmp] \t\n"
    233             "mfhi  %[tmp]   \t\n"
    234             "addu  %[tmp],%[tmp],%[tmp1] \t\n"
    235             "sll   %[tmp],%[tmp],%[lshift] \t\n"
    236             "srl   %[res],%[res],%[rshift]    \t\n"
    237             "or    %[res],%[res],%[tmp] \t\n"
    238             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    239             : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
    240             : "%hi","%lo"
    241             );
    242         } else {
    243             asm ("mult %[a], %[b] \t\n"
    244             "li  %[tmp],1 \t\n"
    245             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    246             "mflo  %[res]   \t\n"
    247             "addu %[tmp1],%[tmp],%[res] \t\n"
    248             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    249             "sra  %[tmp2],%[tmp],0x1f \t\n"
    250             "addu  %[res],%[res],%[tmp] \t\n"
    251             "mfhi  %[tmp]   \t\n"
    252             "addu  %[tmp],%[tmp],%[tmp2] \t\n"
    253             "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
    254             "srl   %[tmp2],%[res],%[rshift]    \t\n"
    255             "srav  %[res], %[tmp],%[rshift]\t\n"
    256             "sll   %[tmp],%[tmp],1 \t\n"
    257             "sll   %[tmp],%[tmp],%[norbits] \t\n"
    258             "or    %[tmp],%[tmp],%[tmp2] \t\n"
    259             "movz  %[res],%[tmp],%[bit5] \t\n"
    260             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    261             : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
    262             : "%hi","%lo"
    263             );
    264         }
    265     } else {
    266         asm ("mult %[a], %[b] \t\n"
    267         "li  %[tmp],1 \t\n"
    268         "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    269         "mflo  %[res]   \t\n"
    270         "addu %[tmp1],%[tmp],%[res] \t\n"
    271         "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    272         "sra  %[tmp2],%[tmp],0x1f \t\n"
    273         "addu  %[res],%[res],%[tmp] \t\n"
    274         "mfhi  %[tmp]   \t\n"
    275         "addu  %[tmp],%[tmp],%[tmp2] \t\n"
    276         "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
    277         "srl   %[tmp2],%[res],%[rshift]    \t\n"
    278         "srav  %[res], %[tmp],%[rshift]\t\n"
    279         "sll   %[tmp],%[tmp],1 \t\n"
    280         "sll   %[tmp],%[tmp],%[norbits] \t\n"
    281         "or    %[tmp],%[tmp],%[tmp2] \t\n"
    282         "movz  %[res],%[tmp],%[bit5] \t\n"
    283          : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    284          : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
    285          : "%hi","%lo"
    286          );
    287         }
    288 
    289         return result;
    290 }
    291 
    292 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    293 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    294     GGLfixed result,t,tmp1,tmp2;
    295 
    296     if (__builtin_constant_p(shift)) {
    297         if (shift == 0) {
    298                  asm ("mult %[a], %[b] \t\n"
    299                  "mflo  %[lo]   \t\n"
    300                  "addu  %[lo],%[lo],%[c]    \t\n"
    301                  : [lo]"=&r"(result)
    302                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    303                  : "%hi","%lo"
    304                  );
    305                 } else if (shift == 32) {
    306                     asm ("mult %[a], %[b] \t\n"
    307                     "mfhi  %[lo]   \t\n"
    308                     "addu  %[lo],%[lo],%[c]    \t\n"
    309                     : [lo]"=&r"(result)
    310                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    311                     : "%hi","%lo"
    312                     );
    313                 } else if ((shift>0) && (shift<32)) {
    314                     asm ("mult %[a], %[b] \t\n"
    315                     "mflo  %[res]   \t\n"
    316                     "mfhi  %[t]   \t\n"
    317                     "srl   %[res],%[res],%[rshift]    \t\n"
    318                     "sll   %[t],%[t],%[lshift]     \t\n"
    319                     "or  %[res],%[res],%[t]    \t\n"
    320                     "addu  %[res],%[res],%[c]    \t\n"
    321                     : [res]"=&r"(result),[t]"=&r"(t)
    322                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
    323                     : "%hi","%lo"
    324                     );
    325                 } else {
    326                     asm ("mult %[a], %[b] \t\n"
    327                     "nor %[tmp1],$zero,%[shift]\t\n"
    328                     "mflo  %[res]   \t\n"
    329                     "mfhi  %[t]   \t\n"
    330                     "srl   %[res],%[res],%[shift]    \t\n"
    331                     "sll   %[tmp2],%[t],1     \t\n"
    332                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    333                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
    334                     "srav  %[res],%[t],%[shift]     \t\n"
    335                     "andi %[tmp2],%[shift],0x20\t\n"
    336                     "movz %[res],%[tmp1],%[tmp2]\t\n"
    337                     "addu  %[res],%[res],%[c]    \t\n"
    338                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    339                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
    340                     : "%hi","%lo"
    341                     );
    342                 }
    343             } else {
    344                 asm ("mult %[a], %[b] \t\n"
    345                 "nor %[tmp1],$zero,%[shift]\t\n"
    346                 "mflo  %[res]   \t\n"
    347                 "mfhi  %[t]   \t\n"
    348                 "srl   %[res],%[res],%[shift]    \t\n"
    349                 "sll   %[tmp2],%[t],1     \t\n"
    350                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    351                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
    352                 "srav  %[res],%[t],%[shift]     \t\n"
    353                 "andi %[tmp2],%[shift],0x20\t\n"
    354                 "movz %[res],%[tmp1],%[tmp2]\t\n"
    355                 "addu  %[res],%[res],%[c]    \t\n"
    356                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    357                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
    358                 : "%hi","%lo"
    359                 );
    360             }
    361             return result;
    362 }
    363 
    364 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    365 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    366     GGLfixed result,t,tmp1,tmp2;
    367 
    368     if (__builtin_constant_p(shift)) {
    369         if (shift == 0) {
    370                  asm ("mult %[a], %[b] \t\n"
    371                  "mflo  %[lo]   \t\n"
    372                  "subu  %[lo],%[lo],%[c]    \t\n"
    373                  : [lo]"=&r"(result)
    374                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    375                  : "%hi","%lo"
    376                  );
    377                 } else if (shift == 32) {
    378                     asm ("mult %[a], %[b] \t\n"
    379                     "mfhi  %[lo]   \t\n"
    380                     "subu  %[lo],%[lo],%[c]    \t\n"
    381                     : [lo]"=&r"(result)
    382                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    383                     : "%hi","%lo"
    384                     );
    385                 } else if ((shift>0) && (shift<32)) {
    386                     asm ("mult %[a], %[b] \t\n"
    387                     "mflo  %[res]   \t\n"
    388                     "mfhi  %[t]   \t\n"
    389                     "srl   %[res],%[res],%[rshift]    \t\n"
    390                     "sll   %[t],%[t],%[lshift]     \t\n"
    391                     "or  %[res],%[res],%[t]    \t\n"
    392                     "subu  %[res],%[res],%[c]    \t\n"
    393                     : [res]"=&r"(result),[t]"=&r"(t)
    394                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
    395                     : "%hi","%lo"
    396                     );
    397                 } else {
    398                     asm ("mult %[a], %[b] \t\n"
    399                     "nor %[tmp1],$zero,%[shift]\t\n"
    400                      "mflo  %[res]   \t\n"
    401                      "mfhi  %[t]   \t\n"
    402                      "srl   %[res],%[res],%[shift]    \t\n"
    403                      "sll   %[tmp2],%[t],1     \t\n"
    404                      "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    405                      "or  %[tmp1],%[tmp2],%[res]    \t\n"
    406                      "srav  %[res],%[t],%[shift]     \t\n"
    407                      "andi %[tmp2],%[shift],0x20\t\n"
    408                      "movz %[res],%[tmp1],%[tmp2]\t\n"
    409                      "subu  %[res],%[res],%[c]    \t\n"
    410                      : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    411                      : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
    412                      : "%hi","%lo"
    413                      );
    414                     }
    415                 } else {
    416                 asm ("mult %[a], %[b] \t\n"
    417                 "nor %[tmp1],$zero,%[shift]\t\n"
    418                 "mflo  %[res]   \t\n"
    419                 "mfhi  %[t]   \t\n"
    420                 "srl   %[res],%[res],%[shift]    \t\n"
    421                 "sll   %[tmp2],%[t],1     \t\n"
    422                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    423                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
    424                 "srav  %[res],%[t],%[shift]     \t\n"
    425                 "andi %[tmp2],%[shift],0x20\t\n"
    426                 "movz %[res],%[tmp1],%[tmp2]\t\n"
    427                 "subu  %[res],%[res],%[c]    \t\n"
    428                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    429                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
    430                 : "%hi","%lo"
    431                 );
    432             }
    433     return result;
    434 }
    435 
    436 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
    437 inline int64_t gglMulii(int32_t x, int32_t y) {
    438     union {
    439         struct {
    440 #if defined(__MIPSEL__)
    441             int32_t lo;
    442             int32_t hi;
    443 #elif defined(__MIPSEB__)
    444             int32_t hi;
    445             int32_t lo;
    446 #endif
    447         } s;
    448         int64_t res;
    449     }u;
    450     asm("mult %2, %3 \t\n"
    451         "mfhi %1   \t\n"
    452         "mflo %0   \t\n"
    453         : "=r"(u.s.lo), "=&r"(u.s.hi)
    454         : "%r"(x), "r"(y)
    455 	: "%hi","%lo"
    456         );
    457     return u.res;
    458 }
    459 
    460 #elif defined(__aarch64__)
    461 
    462 // inline AArch64 implementations
    463 
    464 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
    465 inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
    466 {
    467     GGLfixed result;
    468     GGLfixed round;
    469 
    470     asm("mov    %x[round], #1                        \n"
    471         "lsl    %x[round], %x[round], %x[shift]      \n"
    472         "lsr    %x[round], %x[round], #1             \n"
    473         "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
    474         "lsr    %x[result], %x[result], %x[shift]    \n"
    475         : [round]"=&r"(round), [result]"=&r"(result) \
    476         : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
    477         :
    478        );
    479     return result;
    480 }
    481 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
    482 inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
    483 {
    484     GGLfixed result;
    485     asm("smull  %x[result], %w[x], %w[y]                     \n"
    486         "lsr    %x[result], %x[result], %x[shift]            \n"
    487         "add    %w[result], %w[result], %w[a]                \n"
    488         : [result]"=&r"(result)                               \
    489         : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
    490         :
    491         );
    492     return result;
    493 }
    494 
    495 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
    496 inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
    497 {
    498 
    499     GGLfixed result;
    500     int rshift;
    501 
    502     asm("smull  %x[result], %w[x], %w[y]                     \n"
    503         "lsr    %x[result], %x[result], %x[shift]            \n"
    504         "sub    %w[result], %w[result], %w[a]                \n"
    505         : [result]"=&r"(result)                               \
    506         : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
    507         :
    508         );
    509     return result;
    510 }
    511 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
    512 inline int64_t gglMulii(int32_t x, int32_t y)
    513 {
    514     int64_t res;
    515     asm("smull  %x0, %w1, %w2 \n"
    516         : "=r"(res)
    517         : "%r"(x), "r"(y)
    518         :
    519         );
    520     return res;
    521 }
    522 
    523 #elif defined(__mips__) && __mips_isa_rev == 6
    524 
    525 /*inline MIPS implementations*/
    526 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
    527 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
    528     GGLfixed result,tmp,tmp1,tmp2;
    529 
    530     if (__builtin_constant_p(shift)) {
    531         if (shift == 0) {
    532             asm ("mul %[res], %[a], %[b] \t\n"
    533             : [res]"=&r"(result)
    534             : [a]"r"(a),[b]"r"(b)
    535             );
    536         } else if (shift == 32)
    537         {
    538             asm ("mul %[res], %[a], %[b] \t\n"
    539             "li  %[tmp],1\t\n"
    540             "sll  %[tmp],%[tmp],0x1f\t\n"
    541             "addu %[tmp1],%[tmp],%[res] \t\n"
    542             "muh %[res], %[a], %[b] \t\n"
    543             "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
    544             "sra %[tmp],%[tmp],0x1f \t\n"
    545             "addu %[res],%[res],%[tmp]\t\n"
    546             "addu %[res],%[res],%[tmp1]\t\n"
    547             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
    548             : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
    549             );
    550         } else if ((shift >0) && (shift < 32))
    551         {
    552             asm ("mul %[res], %[a], %[b] \t\n"
    553             "li  %[tmp],1 \t\n"
    554             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    555             "addu %[tmp1],%[tmp],%[res] \t\n"
    556             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    557             "addu  %[res],%[res],%[tmp] \t\n"
    558             "muh %[tmp], %[a], %[b] \t\n"
    559             "addu  %[tmp],%[tmp],%[tmp1] \t\n"
    560             "sll   %[tmp],%[tmp],%[lshift] \t\n"
    561             "srl   %[res],%[res],%[rshift]    \t\n"
    562             "or    %[res],%[res],%[tmp] \t\n"
    563             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    564             : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
    565             );
    566         } else {
    567             asm ("mul %[res], %[a], %[b] \t\n"
    568             "li  %[tmp],1 \t\n"
    569             "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    570             "addu %[tmp1],%[tmp],%[res] \t\n"
    571             "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    572             "sra  %[tmp2],%[tmp],0x1f \t\n"
    573             "addu  %[res],%[res],%[tmp] \t\n"
    574             "muh  %[tmp], %[a], %[b]   \t\n"
    575             "addu  %[tmp],%[tmp],%[tmp2] \t\n"
    576             "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
    577             "srl   %[tmp2],%[res],%[rshift]    \t\n"
    578             "srav  %[res], %[tmp],%[rshift]\t\n"
    579             "sll   %[tmp],%[tmp],1 \t\n"
    580             "sll   %[tmp],%[tmp],%[norbits] \t\n"
    581             "or    %[tmp],%[tmp],%[tmp2] \t\n"
    582             "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
    583             "selnez  %[res],%[res],%[bit5] \t\n"
    584             "or    %[res],%[res],%[tmp] \t\n"
    585             : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    586             : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
    587             );
    588         }
    589     } else {
    590         asm ("mul %[res], %[a], %[b] \t\n"
    591         "li  %[tmp],1 \t\n"
    592         "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
    593         "addu %[tmp1],%[tmp],%[res] \t\n"
    594         "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
    595         "sra  %[tmp2],%[tmp],0x1f \t\n"
    596         "addu  %[res],%[res],%[tmp] \t\n"
    597         "muh  %[tmp], %[a], %[b] \t\n"
    598         "addu  %[tmp],%[tmp],%[tmp2] \t\n"
    599         "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
    600         "srl   %[tmp2],%[res],%[rshift]    \t\n"
    601         "srav  %[res], %[tmp],%[rshift]\t\n"
    602         "sll   %[tmp],%[tmp],1 \t\n"
    603         "sll   %[tmp],%[tmp],%[norbits] \t\n"
    604         "or    %[tmp],%[tmp],%[tmp2] \t\n"
    605         "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
    606         "selnez  %[res],%[res],%[bit5] \t\n"
    607         "or    %[res],%[res],%[tmp] \t\n"
    608          : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    609          : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
    610          );
    611         }
    612         return result;
    613 }
    614 
    615 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    616 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    617     GGLfixed result,t,tmp1,tmp2;
    618 
    619     if (__builtin_constant_p(shift)) {
    620         if (shift == 0) {
    621                  asm ("mul %[lo], %[a], %[b] \t\n"
    622                  "addu  %[lo],%[lo],%[c]    \t\n"
    623                  : [lo]"=&r"(result)
    624                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    625                  );
    626                 } else if (shift == 32) {
    627                     asm ("muh %[lo], %[a], %[b] \t\n"
    628                     "addu  %[lo],%[lo],%[c]    \t\n"
    629                     : [lo]"=&r"(result)
    630                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    631                     );
    632                 } else if ((shift>0) && (shift<32)) {
    633                     asm ("mul %[res], %[a], %[b] \t\n"
    634                     "muh  %[t], %[a], %[b] \t\n"
    635                     "srl   %[res],%[res],%[rshift]    \t\n"
    636                     "sll   %[t],%[t],%[lshift]     \t\n"
    637                     "or  %[res],%[res],%[t]    \t\n"
    638                     "addu  %[res],%[res],%[c]    \t\n"
    639                     : [res]"=&r"(result),[t]"=&r"(t)
    640                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
    641                     );
    642                 } else {
    643                     asm ("mul %[res], %[a], %[b] \t\n"
    644                     "muh %[t], %[a], %[b] \t\n"
    645                     "nor %[tmp1],$zero,%[shift]\t\n"
    646                     "srl   %[res],%[res],%[shift]    \t\n"
    647                     "sll   %[tmp2],%[t],1     \t\n"
    648                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    649                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
    650                     "srav  %[res],%[t],%[shift]     \t\n"
    651                     "andi %[tmp2],%[shift],0x20\t\n"
    652                     "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
    653                     "selnez %[res],%[res],%[tmp2]\t\n"
    654                     "or %[res],%[res],%[tmp1]\t\n"
    655                     "addu  %[res],%[res],%[c]    \t\n"
    656                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    657                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
    658                     );
    659                 }
    660             } else {
    661                 asm ("mul %[res], %[a], %[b] \t\n"
    662                 "muh %[t], %[a], %[b] \t\n"
    663                 "nor %[tmp1],$zero,%[shift]\t\n"
    664                 "srl   %[res],%[res],%[shift]    \t\n"
    665                 "sll   %[tmp2],%[t],1     \t\n"
    666                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    667                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
    668                 "srav  %[res],%[t],%[shift]     \t\n"
    669                 "andi %[tmp2],%[shift],0x20\t\n"
    670                 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
    671                 "selnez %[res],%[res],%[tmp2]\t\n"
    672                 "or %[res],%[res],%[tmp1]\t\n"
    673                 "addu  %[res],%[res],%[c]    \t\n"
    674                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    675                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
    676                 );
    677             }
    678             return result;
    679 }
    680 
    681 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    682 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    683     GGLfixed result,t,tmp1,tmp2;
    684 
    685     if (__builtin_constant_p(shift)) {
    686         if (shift == 0) {
    687                  asm ("mul %[lo], %[a], %[b] \t\n"
    688                  "subu  %[lo],%[lo],%[c]    \t\n"
    689                  : [lo]"=&r"(result)
    690                  : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    691                  );
    692                 } else if (shift == 32) {
    693                     asm ("muh %[lo], %[a], %[b] \t\n"
    694                     "subu  %[lo],%[lo],%[c]    \t\n"
    695                     : [lo]"=&r"(result)
    696                     : [a]"r"(a),[b]"r"(b),[c]"r"(c)
    697                     );
    698                 } else if ((shift>0) && (shift<32)) {
    699                     asm ("mul %[res], %[a], %[b] \t\n"
    700                     "muh %[t], %[a], %[b] \t\n"
    701                     "srl   %[res],%[res],%[rshift]    \t\n"
    702                     "sll   %[t],%[t],%[lshift]     \t\n"
    703                     "or  %[res],%[res],%[t]    \t\n"
    704                     "subu  %[res],%[res],%[c]    \t\n"
    705                     : [res]"=&r"(result),[t]"=&r"(t)
    706                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
    707                     );
    708                 } else {
    709                     asm ("mul %[res], %[a], %[b] \t\n"
    710                     "muh %[t], %[a], %[b] \t\n"
    711                     "nor %[tmp1],$zero,%[shift]\t\n"
    712                     "srl   %[res],%[res],%[shift]    \t\n"
    713                     "sll   %[tmp2],%[t],1     \t\n"
    714                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    715                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
    716                     "srav  %[res],%[t],%[shift]     \t\n"
    717                     "andi %[tmp2],%[shift],0x20\t\n"
    718                     "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
    719                     "selnez %[res],%[res],%[tmp2]\t\n"
    720                     "or %[res],%[res],%[tmp1]\t\n"
    721                     "subu  %[res],%[res],%[c]    \t\n"
    722                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    723                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
    724                      );
    725                     }
    726                 } else {
    727                 asm ("mul %[res], %[a], %[b] \t\n"
    728                 "muh %[t], %[a], %[b] \t\n"
    729                 "nor %[tmp1],$zero,%[shift]\t\n"
    730                 "srl   %[res],%[res],%[shift]    \t\n"
    731                 "sll   %[tmp2],%[t],1     \t\n"
    732                 "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
    733                 "or  %[tmp1],%[tmp2],%[res]    \t\n"
    734                 "srav  %[res],%[t],%[shift]     \t\n"
    735                 "andi %[tmp2],%[shift],0x20\t\n"
    736                 "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
    737                 "selnez %[res],%[res],%[tmp2]\t\n"
    738                 "or %[res],%[res],%[tmp1]\t\n"
    739                 "subu  %[res],%[res],%[c]    \t\n"
    740                 : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
    741                 : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
    742                 );
    743             }
    744     return result;
    745 }
    746 
    747 inline int64_t gglMulii(int32_t x, int32_t y) CONST;
    748 inline int64_t gglMulii(int32_t x, int32_t y) {
    749     union {
    750         struct {
    751 #if defined(__MIPSEL__)
    752             int32_t lo;
    753             int32_t hi;
    754 #elif defined(__MIPSEB__)
    755             int32_t hi;
    756             int32_t lo;
    757 #endif
    758         } s;
    759         int64_t res;
    760     }u;
    761     asm("mul %0, %2, %3 \t\n"
    762         "muh %1, %2, %3 \t\n"
    763         : "=r"(u.s.lo), "=&r"(u.s.hi)
    764         : "%r"(x), "r"(y)
    765         );
    766     return u.res;
    767 }
    768 
    769 #else // ----------------------------------------------------------------------
    770 
    771 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
    772 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
    773     return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
    774 }
    775 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    776 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    777     return GGLfixed((int64_t(a)*b)>>shift) + c;
    778 }
    779 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
    780 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
    781     return GGLfixed((int64_t(a)*b)>>shift) - c;
    782 }
    783 inline int64_t gglMulii(int32_t a, int32_t b) CONST;
    784 inline int64_t gglMulii(int32_t a, int32_t b) {
    785     return int64_t(a)*b;
    786 }
    787 
    788 #endif
    789 
    790 // ------------------------------------------------------------------------
    791 
    792 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
    793 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
    794     return gglMulx(a, b, 16);
    795 }
    796 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
    797 inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
    798     return gglMulAddx(a, b, c, 16);
    799 }
    800 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
    801 inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
    802     return gglMulSubx(a, b, c, 16);
    803 }
    804 
    805 // ------------------------------------------------------------------------
    806 
    807 inline int32_t gglClz(int32_t x) CONST;
    808 inline int32_t gglClz(int32_t x)
    809 {
    810 #if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
    811     return __builtin_clz(x);
    812 #else
    813     if (!x) return 32;
    814     int32_t exp = 31;
    815     if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
    816     if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
    817     if (x & 0x000000f0) { exp -= 4; x >>= 4; }
    818     if (x & 0x0000000c) { exp -= 2; x >>= 2; }
    819     if (x & 0x00000002) { exp -= 1; }
    820     return exp;
    821 #endif
    822 }
    823 
    824 // ------------------------------------------------------------------------
    825 
    826 int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
    827 
    828 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
    829 inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
    830     return gglDivQ(n, d, 16);
    831 }
    832 
    833 inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
    834 inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
    835     return gglDivQ(n, d, 16);
    836 }
    837 
    838 // ------------------------------------------------------------------------
    839 
    840 inline GGLfixed gglRecipFast(GGLfixed x) CONST;
    841 inline GGLfixed gglRecipFast(GGLfixed x)
    842 {
    843     // This is a really bad approximation of 1/x, but it's also
    844     // very fast. x must be strictly positive.
    845     // if x between [0.5, 1[ , then 1/x = 3-2*x
    846     // (we use 2.30 fixed-point)
    847     const int32_t lz = gglClz(x);
    848     return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
    849 }
    850 
    851 // ------------------------------------------------------------------------
    852 
    853 inline GGLfixed gglClampx(GGLfixed c) CONST;
    854 inline GGLfixed gglClampx(GGLfixed c)
    855 {
    856 #if defined(__thumb__)
    857     // clamp without branches
    858     c &= ~(c>>31);  c = FIXED_ONE - c;
    859     c &= ~(c>>31);  c = FIXED_ONE - c;
    860 #else
    861 #if defined(__arm__)
    862     // I don't know why gcc thinks its smarter than me! The code below
    863     // clamps to zero in one instruction, but gcc won't generate it and
    864     // replace it by a cmp + movlt (it's quite amazing actually).
    865     asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
    866 #elif defined(__aarch64__)
    867     asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
    868 #else
    869     c &= ~(c>>31);
    870 #endif
    871     if (c>FIXED_ONE)
    872         c = FIXED_ONE;
    873 #endif
    874     return c;
    875 }
    876 
    877 // ------------------------------------------------------------------------
    878 
    879 #endif // ANDROID_GGL_FIXED_H
    880