Home | History | Annotate | Download | only in debase
      1 /*-------------------------------------------------------------------------
      2  * drawElements Base Portability Library
      3  * -------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief 32-bit integer math.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "deInt32.h"
     25 
     26 DE_BEGIN_EXTERN_C
     27 
     28 const deInt8 g_clzLUT[256] =
     29 {
     30 	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
     31 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     32 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     33 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     34 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     35 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     36 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     37 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     38 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     39 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     40 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     41 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     42 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     43 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     44 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     45 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     46 };
     47 
     48 /*--------------------------------------------------------------------*//*!
     49  * \brief Compute the reciprocal of an integer.
     50  * \param a		Input value.
     51  * \param rcp	Pointer to resulting reciprocal "mantissa" value.
     52  * \param exp	Pointer to resulting exponent value.
     53  *
     54  * The returned value is split into an exponent part and a mantissa part for
     55  * practical reasons. The actual reciprocal can be computed with the formula:
     56  * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
     57  *//*--------------------------------------------------------------------*/
     58 void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
     59 {
     60 	enum { RCP_LUT_BITS = 8 };
     61 	static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
     62 	{
     63 		0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
     64 		0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
     65 		0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
     66 		0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
     67 		0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
     68 		0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
     69 		0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
     70 		0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
     71 		0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
     72 		0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
     73 		0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
     74 		0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
     75 		0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
     76 		0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
     77 		0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
     78 		0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
     79 		0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
     80 		0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
     81 		0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
     82 		0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
     83 		0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
     84 		0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
     85 		0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
     86 		0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
     87 		0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
     88 		0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
     89 		0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
     90 		0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
     91 		0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
     92 		0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
     93 		0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
     94 		0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
     95 		0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
     96 		0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
     97 		0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
     98 		0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
     99 		0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
    100 		0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
    101 		0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
    102 		0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
    103 		0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
    104 		0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
    105 		0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
    106 		0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
    107 		0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
    108 		0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
    109 		0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
    110 		0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
    111 		0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
    112 		0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
    113 		0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
    114 		0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
    115 		0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
    116 		0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
    117 		0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
    118 		0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
    119 		0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
    120 		0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
    121 		0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
    122 		0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
    123 		0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
    124 		0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
    125 		0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
    126 		0x20408102, 0x2030486c, 0x20202020, 0x20100804
    127 	};
    128 
    129 	DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
    130 
    131 	int			shift		= deClz32(a);
    132 	deUint32	normalized	= (deUint32)a << shift; /* Highest bit is always 1. */
    133 	int			lookupNdx	= (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
    134 	deUint32	result;
    135 	deUint32	tmp;
    136 
    137 	/* Must be non-negative. */
    138 	DE_ASSERT(a > 0);
    139 
    140 	/* First approximation from lookup table. */
    141 	DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
    142 	result = s_rcpLUT[lookupNdx];
    143 
    144 	/* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
    145 	tmp		= deSafeMuluAsr32(result, normalized, 31);
    146 	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
    147 
    148 	/* Second Newton-Raphson iteration (now has full accuracy). */
    149 	tmp		= deSafeMuluAsr32(result, normalized, 31);
    150 	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
    151 
    152 	/* Return results. */
    153 	*rcp = result;
    154 	*exp = 31 - shift;
    155 }
    156 
    157 DE_END_EXTERN_C
    158