1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #ifndef SkMath_DEFINED 11 #define SkMath_DEFINED 12 13 #include "SkTypes.h" 14 15 // 64bit -> 32bit utilities 16 17 /** 18 * Return true iff the 64bit value can exactly be represented in signed 32bits 19 */ 20 static inline bool sk_64_isS32(int64_t value) { 21 return (int32_t)value == value; 22 } 23 24 /** 25 * Return the 64bit argument as signed 32bits, asserting in debug that the arg 26 * exactly fits in signed 32bits. In the release build, no checks are preformed 27 * and the return value if the arg does not fit is undefined. 28 */ 29 static inline int32_t sk_64_asS32(int64_t value) { 30 SkASSERT(sk_64_isS32(value)); 31 return (int32_t)value; 32 } 33 34 // Handy util that can be passed two ints, and will automatically promote to 35 // 64bits before the multiply, so the caller doesn't have to remember to cast 36 // e.g. (int64_t)a * b; 37 static inline int64_t sk_64_mul(int64_t a, int64_t b) { 38 return a * b; 39 } 40 41 /////////////////////////////////////////////////////////////////////////////// 42 43 /** 44 * Computes numer1 * numer2 / denom in full 64 intermediate precision. 45 * It is an error for denom to be 0. There is no special handling if 46 * the result overflows 32bits. 47 */ 48 static inline int32_t SkMulDiv(int32_t numer1, int32_t numer2, int32_t denom) { 49 SkASSERT(denom); 50 51 int64_t tmp = sk_64_mul(numer1, numer2) / denom; 52 return sk_64_asS32(tmp); 53 } 54 55 /** 56 * Computes (numer1 << shift) / denom in full 64 intermediate precision. 57 * It is an error for denom to be 0. There is no special handling if 58 * the result overflows 32bits. 59 */ 60 int32_t SkDivBits(int32_t numer, int32_t denom, int shift); 61 62 /** 63 * Return the integer square root of value, with a bias of bitBias 64 */ 65 int32_t SkSqrtBits(int32_t value, int bitBias); 66 67 /** Return the integer square root of n, treated as a SkFixed (16.16) 68 */ 69 #define SkSqrt32(n) SkSqrtBits(n, 15) 70 71 //! Returns the number of leading zero bits (0...32) 72 int SkCLZ_portable(uint32_t); 73 74 #ifndef SkCLZ 75 #if defined(_MSC_VER) && _MSC_VER >= 1400 76 #include <intrin.h> 77 78 static inline int SkCLZ(uint32_t mask) { 79 if (mask) { 80 DWORD index; 81 _BitScanReverse(&index, mask); 82 return index ^ 0x1F; 83 } else { 84 return 32; 85 } 86 } 87 #elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__) 88 static inline int SkCLZ(uint32_t mask) { 89 // __builtin_clz(0) is undefined, so we have to detect that case. 90 return mask ? __builtin_clz(mask) : 32; 91 } 92 #else 93 #define SkCLZ(x) SkCLZ_portable(x) 94 #endif 95 #endif 96 97 /** 98 * Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches) 99 */ 100 static inline int SkClampPos(int value) { 101 return value & ~(value >> 31); 102 } 103 104 /** Given an integer and a positive (max) integer, return the value 105 * pinned against 0 and max, inclusive. 106 * @param value The value we want returned pinned between [0...max] 107 * @param max The positive max value 108 * @return 0 if value < 0, max if value > max, else value 109 */ 110 static inline int SkClampMax(int value, int max) { 111 // ensure that max is positive 112 SkASSERT(max >= 0); 113 if (value < 0) { 114 value = 0; 115 } 116 if (value > max) { 117 value = max; 118 } 119 return value; 120 } 121 122 /** 123 * Returns the smallest power-of-2 that is >= the specified value. If value 124 * is already a power of 2, then it is returned unchanged. It is undefined 125 * if value is <= 0. 126 */ 127 static inline int SkNextPow2(int value) { 128 SkASSERT(value > 0); 129 return 1 << (32 - SkCLZ(value - 1)); 130 } 131 132 /** 133 * Returns the log2 of the specified value, were that value to be rounded up 134 * to the next power of 2. It is undefined to pass 0. Examples: 135 * SkNextLog2(1) -> 0 136 * SkNextLog2(2) -> 1 137 * SkNextLog2(3) -> 2 138 * SkNextLog2(4) -> 2 139 * SkNextLog2(5) -> 3 140 */ 141 static inline int SkNextLog2(uint32_t value) { 142 SkASSERT(value != 0); 143 return 32 - SkCLZ(value - 1); 144 } 145 146 /** 147 * Returns true if value is a power of 2. Does not explicitly check for 148 * value <= 0. 149 */ 150 static inline bool SkIsPow2(int value) { 151 return (value & (value - 1)) == 0; 152 } 153 154 /////////////////////////////////////////////////////////////////////////////// 155 156 /** 157 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t. 158 * With this requirement, we can generate faster instructions on some 159 * architectures. 160 */ 161 #ifdef SK_ARM_HAS_EDSP 162 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { 163 SkASSERT((int16_t)x == x); 164 SkASSERT((int16_t)y == y); 165 int32_t product; 166 asm("smulbb %0, %1, %2 \n" 167 : "=r"(product) 168 : "r"(x), "r"(y) 169 ); 170 return product; 171 } 172 #else 173 #ifdef SK_DEBUG 174 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { 175 SkASSERT((int16_t)x == x); 176 SkASSERT((int16_t)y == y); 177 return x * y; 178 } 179 #else 180 #define SkMulS16(x, y) ((x) * (y)) 181 #endif 182 #endif 183 184 /** 185 * Return a*b/((1 << shift) - 1), rounding any fractional bits. 186 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 187 */ 188 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { 189 SkASSERT(a <= 32767); 190 SkASSERT(b <= 32767); 191 SkASSERT(shift > 0 && shift <= 8); 192 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); 193 return (prod + (prod >> shift)) >> shift; 194 } 195 196 /** 197 * Return a*b/255, rounding any fractional bits. 198 * Only valid if a and b are unsigned and <= 32767. 199 */ 200 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { 201 SkASSERT(a <= 32767); 202 SkASSERT(b <= 32767); 203 unsigned prod = SkMulS16(a, b) + 128; 204 return (prod + (prod >> 8)) >> 8; 205 } 206 207 /** 208 * Stores numer/denom and numer%denom into div and mod respectively. 209 */ 210 template <typename In, typename Out> 211 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { 212 #ifdef SK_CPU_ARM32 213 // If we wrote this as in the else branch, GCC won't fuse the two into one 214 // divmod call, but rather a div call followed by a divmod. Silly! This 215 // version is just as fast as calling __aeabi_[u]idivmod manually, but with 216 // prettier code. 217 // 218 // This benches as around 2x faster than the code in the else branch. 219 const In d = numer/denom; 220 *div = static_cast<Out>(d); 221 *mod = static_cast<Out>(numer-d*denom); 222 #else 223 // On x86 this will just be a single idiv. 224 *div = static_cast<Out>(numer/denom); 225 *mod = static_cast<Out>(numer%denom); 226 #endif 227 } 228 229 #endif 230