1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #ifndef SkMath_DEFINED 11 #define SkMath_DEFINED 12 13 #include "SkTypes.h" 14 15 /** 16 * Computes numer1 * numer2 / denom in full 64 intermediate precision. 17 * It is an error for denom to be 0. There is no special handling if 18 * the result overflows 32bits. 19 */ 20 int32_t SkMulDiv(int32_t numer1, int32_t numer2, int32_t denom); 21 22 /** 23 * Computes (numer1 << shift) / denom in full 64 intermediate precision. 24 * It is an error for denom to be 0. There is no special handling if 25 * the result overflows 32bits. 26 */ 27 int32_t SkDivBits(int32_t numer, int32_t denom, int shift); 28 29 /** 30 * Return the integer square root of value, with a bias of bitBias 31 */ 32 int32_t SkSqrtBits(int32_t value, int bitBias); 33 34 /** Return the integer square root of n, treated as a SkFixed (16.16) 35 */ 36 #define SkSqrt32(n) SkSqrtBits(n, 15) 37 38 /////////////////////////////////////////////////////////////////////////////// 39 40 //! Returns the number of leading zero bits (0...32) 41 int SkCLZ_portable(uint32_t); 42 43 #ifndef SkCLZ 44 #if defined(_MSC_VER) && _MSC_VER >= 1400 45 #include <intrin.h> 46 47 static inline int SkCLZ(uint32_t mask) { 48 if (mask) { 49 DWORD index; 50 _BitScanReverse(&index, mask); 51 return index ^ 0x1F; 52 } else { 53 return 32; 54 } 55 } 56 #elif defined(SK_CPU_ARM) || defined(__GNUC__) || defined(__clang__) 57 static inline int SkCLZ(uint32_t mask) { 58 // __builtin_clz(0) is undefined, so we have to detect that case. 59 return mask ? __builtin_clz(mask) : 32; 60 } 61 #else 62 #define SkCLZ(x) SkCLZ_portable(x) 63 #endif 64 #endif 65 66 /** 67 * Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches) 68 */ 69 static inline int SkClampPos(int value) { 70 return value & ~(value >> 31); 71 } 72 73 /** Given an integer and a positive (max) integer, return the value 74 * pinned against 0 and max, inclusive. 75 * @param value The value we want returned pinned between [0...max] 76 * @param max The positive max value 77 * @return 0 if value < 0, max if value > max, else value 78 */ 79 static inline int SkClampMax(int value, int max) { 80 // ensure that max is positive 81 SkASSERT(max >= 0); 82 if (value < 0) { 83 value = 0; 84 } 85 if (value > max) { 86 value = max; 87 } 88 return value; 89 } 90 91 /** 92 * Returns the smallest power-of-2 that is >= the specified value. If value 93 * is already a power of 2, then it is returned unchanged. It is undefined 94 * if value is <= 0. 95 */ 96 static inline int SkNextPow2(int value) { 97 SkASSERT(value > 0); 98 return 1 << (32 - SkCLZ(value - 1)); 99 } 100 101 /** 102 * Returns the log2 of the specified value, were that value to be rounded up 103 * to the next power of 2. It is undefined to pass 0. Examples: 104 * SkNextLog2(1) -> 0 105 * SkNextLog2(2) -> 1 106 * SkNextLog2(3) -> 2 107 * SkNextLog2(4) -> 2 108 * SkNextLog2(5) -> 3 109 */ 110 static inline int SkNextLog2(uint32_t value) { 111 SkASSERT(value != 0); 112 return 32 - SkCLZ(value - 1); 113 } 114 115 /** 116 * Returns true if value is a power of 2. Does not explicitly check for 117 * value <= 0. 118 */ 119 static inline bool SkIsPow2(int value) { 120 return (value & (value - 1)) == 0; 121 } 122 123 /////////////////////////////////////////////////////////////////////////////// 124 125 /** 126 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t. 127 * With this requirement, we can generate faster instructions on some 128 * architectures. 129 */ 130 #ifdef SK_ARM_HAS_EDSP 131 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { 132 SkASSERT((int16_t)x == x); 133 SkASSERT((int16_t)y == y); 134 int32_t product; 135 asm("smulbb %0, %1, %2 \n" 136 : "=r"(product) 137 : "r"(x), "r"(y) 138 ); 139 return product; 140 } 141 #else 142 #ifdef SK_DEBUG 143 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { 144 SkASSERT((int16_t)x == x); 145 SkASSERT((int16_t)y == y); 146 return x * y; 147 } 148 #else 149 #define SkMulS16(x, y) ((x) * (y)) 150 #endif 151 #endif 152 153 /** 154 * Return a*b/((1 << shift) - 1), rounding any fractional bits. 155 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 156 */ 157 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { 158 SkASSERT(a <= 32767); 159 SkASSERT(b <= 32767); 160 SkASSERT(shift > 0 && shift <= 8); 161 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); 162 return (prod + (prod >> shift)) >> shift; 163 } 164 165 /** 166 * Return a*b/255, rounding any fractional bits. 167 * Only valid if a and b are unsigned and <= 32767. 168 */ 169 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { 170 SkASSERT(a <= 32767); 171 SkASSERT(b <= 32767); 172 unsigned prod = SkMulS16(a, b) + 128; 173 return (prod + (prod >> 8)) >> 8; 174 } 175 176 /** 177 * Stores numer/denom and numer%denom into div and mod respectively. 178 */ 179 template <typename In, typename Out> 180 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { 181 #ifdef SK_CPU_ARM 182 // If we wrote this as in the else branch, GCC won't fuse the two into one 183 // divmod call, but rather a div call followed by a divmod. Silly! This 184 // version is just as fast as calling __aeabi_[u]idivmod manually, but with 185 // prettier code. 186 // 187 // This benches as around 2x faster than the code in the else branch. 188 const In d = numer/denom; 189 *div = static_cast<Out>(d); 190 *mod = static_cast<Out>(numer-d*denom); 191 #else 192 // On x86 this will just be a single idiv. 193 *div = static_cast<Out>(numer/denom); 194 *mod = static_cast<Out>(numer%denom); 195 #endif // SK_CPU_ARM 196 } 197 198 #endif 199