Home | History | Annotate | Download | only in base
      1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
      2 //
      3 // ====================================================
      4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
      5 //
      6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
      7 // Permission to use, copy, modify, and distribute this
      8 // software is freely granted, provided that this notice
      9 // is preserved.
     10 // ====================================================
     11 //
     12 // The original source code covered by the above license above has been
     13 // modified significantly by Google Inc.
     14 // Copyright 2016 the V8 project authors. All rights reserved.
     15 
     16 #include "src/base/ieee754.h"
     17 
     18 #include <cmath>
     19 #include <limits>
     20 
     21 #include "src/base/build_config.h"
     22 #include "src/base/macros.h"
     23 
     24 namespace v8 {
     25 namespace base {
     26 namespace ieee754 {
     27 
     28 namespace {
     29 
     30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
     31 
     32 #if V8_CC_MSVC
     33 
     34 #pragma warning(disable : 4723)
     35 
     36 #endif
     37 
     38 /*
     39  * The original fdlibm code used statements like:
     40  *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
     41  *  ix0 = *(n0+(int*)&x);     * high word of x *
     42  *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
     43  * to dig two 32 bit words out of the 64 bit IEEE floating point
     44  * value.  That is non-ANSI, and, moreover, the gcc instruction
     45  * scheduler gets it wrong.  We instead use the following macros.
     46  * Unlike the original code, we determine the endianness at compile
     47  * time, not at run time; I don't see much benefit to selecting
     48  * endianness at run time.
     49  */
     50 
     51 /*
     52  * A union which permits us to convert between a double and two 32 bit
     53  * ints.
     54  */
     55 
     56 #if V8_TARGET_LITTLE_ENDIAN
     57 
     58 typedef union {
     59   double value;
     60   struct {
     61     uint32_t lsw;
     62     uint32_t msw;
     63   } parts;
     64   struct {
     65     uint64_t w;
     66   } xparts;
     67 } ieee_double_shape_type;
     68 
     69 #else
     70 
     71 typedef union {
     72   double value;
     73   struct {
     74     uint32_t msw;
     75     uint32_t lsw;
     76   } parts;
     77   struct {
     78     uint64_t w;
     79   } xparts;
     80 } ieee_double_shape_type;
     81 
     82 #endif
     83 
     84 /* Get two 32 bit ints from a double.  */
     85 
     86 #define EXTRACT_WORDS(ix0, ix1, d) \
     87   do {                             \
     88     ieee_double_shape_type ew_u;   \
     89     ew_u.value = (d);              \
     90     (ix0) = ew_u.parts.msw;        \
     91     (ix1) = ew_u.parts.lsw;        \
     92   } while (0)
     93 
     94 /* Get a 64-bit int from a double. */
     95 #define EXTRACT_WORD64(ix, d)    \
     96   do {                           \
     97     ieee_double_shape_type ew_u; \
     98     ew_u.value = (d);            \
     99     (ix) = ew_u.xparts.w;        \
    100   } while (0)
    101 
    102 /* Get the more significant 32 bit int from a double.  */
    103 
    104 #define GET_HIGH_WORD(i, d)      \
    105   do {                           \
    106     ieee_double_shape_type gh_u; \
    107     gh_u.value = (d);            \
    108     (i) = gh_u.parts.msw;        \
    109   } while (0)
    110 
    111 /* Get the less significant 32 bit int from a double.  */
    112 
    113 #define GET_LOW_WORD(i, d)       \
    114   do {                           \
    115     ieee_double_shape_type gl_u; \
    116     gl_u.value = (d);            \
    117     (i) = gl_u.parts.lsw;        \
    118   } while (0)
    119 
    120 /* Set a double from two 32 bit ints.  */
    121 
    122 #define INSERT_WORDS(d, ix0, ix1) \
    123   do {                            \
    124     ieee_double_shape_type iw_u;  \
    125     iw_u.parts.msw = (ix0);       \
    126     iw_u.parts.lsw = (ix1);       \
    127     (d) = iw_u.value;             \
    128   } while (0)
    129 
    130 /* Set a double from a 64-bit int. */
    131 #define INSERT_WORD64(d, ix)     \
    132   do {                           \
    133     ieee_double_shape_type iw_u; \
    134     iw_u.xparts.w = (ix);        \
    135     (d) = iw_u.value;            \
    136   } while (0)
    137 
    138 /* Set the more significant 32 bits of a double from an int.  */
    139 
    140 #define SET_HIGH_WORD(d, v)      \
    141   do {                           \
    142     ieee_double_shape_type sh_u; \
    143     sh_u.value = (d);            \
    144     sh_u.parts.msw = (v);        \
    145     (d) = sh_u.value;            \
    146   } while (0)
    147 
    148 /* Set the less significant 32 bits of a double from an int.  */
    149 
    150 #define SET_LOW_WORD(d, v)       \
    151   do {                           \
    152     ieee_double_shape_type sl_u; \
    153     sl_u.value = (d);            \
    154     sl_u.parts.lsw = (v);        \
    155     (d) = sl_u.value;            \
    156   } while (0)
    157 
    158 /* Support macro. */
    159 
    160 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
    161 
    162 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
    163 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
    164 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
    165                       const int32_t *ipio2) WARN_UNUSED_RESULT;
    166 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
    167 
    168 /* __ieee754_rem_pio2(x,y)
    169  *
    170  * return the remainder of x rem pi/2 in y[0]+y[1]
    171  * use __kernel_rem_pio2()
    172  */
    173 int32_t __ieee754_rem_pio2(double x, double *y) {
    174   /*
    175    * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
    176    */
    177   static const int32_t two_over_pi[] = {
    178       0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
    179       0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
    180       0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
    181       0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
    182       0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
    183       0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
    184       0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
    185       0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
    186       0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
    187       0x73A8C9, 0x60E27B, 0xC08C6B,
    188   };
    189 
    190   static const int32_t npio2_hw[] = {
    191       0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
    192       0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
    193       0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
    194       0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
    195       0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
    196       0x404858EB, 0x404921FB,
    197   };
    198 
    199   /*
    200    * invpio2:  53 bits of 2/pi
    201    * pio2_1:   first  33 bit of pi/2
    202    * pio2_1t:  pi/2 - pio2_1
    203    * pio2_2:   second 33 bit of pi/2
    204    * pio2_2t:  pi/2 - (pio2_1+pio2_2)
    205    * pio2_3:   third  33 bit of pi/2
    206    * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
    207    */
    208 
    209   static const double
    210       zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
    211       half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
    212       two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
    213       invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
    214       pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
    215       pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
    216       pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
    217       pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
    218       pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
    219       pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
    220 
    221   double z, w, t, r, fn;
    222   double tx[3];
    223   int32_t e0, i, j, nx, n, ix, hx;
    224   uint32_t low;
    225 
    226   z = 0;
    227   GET_HIGH_WORD(hx, x); /* high word of x */
    228   ix = hx & 0x7fffffff;
    229   if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
    230     y[0] = x;
    231     y[1] = 0;
    232     return 0;
    233   }
    234   if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
    235     if (hx > 0) {
    236       z = x - pio2_1;
    237       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
    238         y[0] = z - pio2_1t;
    239         y[1] = (z - y[0]) - pio2_1t;
    240       } else { /* near pi/2, use 33+33+53 bit pi */
    241         z -= pio2_2;
    242         y[0] = z - pio2_2t;
    243         y[1] = (z - y[0]) - pio2_2t;
    244       }
    245       return 1;
    246     } else { /* negative x */
    247       z = x + pio2_1;
    248       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
    249         y[0] = z + pio2_1t;
    250         y[1] = (z - y[0]) + pio2_1t;
    251       } else { /* near pi/2, use 33+33+53 bit pi */
    252         z += pio2_2;
    253         y[0] = z + pio2_2t;
    254         y[1] = (z - y[0]) + pio2_2t;
    255       }
    256       return -1;
    257     }
    258   }
    259   if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
    260     t = fabs(x);
    261     n = static_cast<int32_t>(t * invpio2 + half);
    262     fn = static_cast<double>(n);
    263     r = t - fn * pio2_1;
    264     w = fn * pio2_1t; /* 1st round good to 85 bit */
    265     if (n < 32 && ix != npio2_hw[n - 1]) {
    266       y[0] = r - w; /* quick check no cancellation */
    267     } else {
    268       uint32_t high;
    269       j = ix >> 20;
    270       y[0] = r - w;
    271       GET_HIGH_WORD(high, y[0]);
    272       i = j - ((high >> 20) & 0x7ff);
    273       if (i > 16) { /* 2nd iteration needed, good to 118 */
    274         t = r;
    275         w = fn * pio2_2;
    276         r = t - w;
    277         w = fn * pio2_2t - ((t - r) - w);
    278         y[0] = r - w;
    279         GET_HIGH_WORD(high, y[0]);
    280         i = j - ((high >> 20) & 0x7ff);
    281         if (i > 49) { /* 3rd iteration need, 151 bits acc */
    282           t = r;      /* will cover all possible cases */
    283           w = fn * pio2_3;
    284           r = t - w;
    285           w = fn * pio2_3t - ((t - r) - w);
    286           y[0] = r - w;
    287         }
    288       }
    289     }
    290     y[1] = (r - y[0]) - w;
    291     if (hx < 0) {
    292       y[0] = -y[0];
    293       y[1] = -y[1];
    294       return -n;
    295     } else {
    296       return n;
    297     }
    298   }
    299   /*
    300    * all other (large) arguments
    301    */
    302   if (ix >= 0x7ff00000) { /* x is inf or NaN */
    303     y[0] = y[1] = x - x;
    304     return 0;
    305   }
    306   /* set z = scalbn(|x|,ilogb(x)-23) */
    307   GET_LOW_WORD(low, x);
    308   SET_LOW_WORD(z, low);
    309   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
    310   SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
    311   for (i = 0; i < 2; i++) {
    312     tx[i] = static_cast<double>(static_cast<int32_t>(z));
    313     z = (z - tx[i]) * two24;
    314   }
    315   tx[2] = z;
    316   nx = 3;
    317   while (tx[nx - 1] == zero) nx--; /* skip zero term */
    318   n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
    319   if (hx < 0) {
    320     y[0] = -y[0];
    321     y[1] = -y[1];
    322     return -n;
    323   }
    324   return n;
    325 }
    326 
    327 /* __kernel_cos( x,  y )
    328  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
    329  * Input x is assumed to be bounded by ~pi/4 in magnitude.
    330  * Input y is the tail of x.
    331  *
    332  * Algorithm
    333  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
    334  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
    335  *      3. cos(x) is approximated by a polynomial of degree 14 on
    336  *         [0,pi/4]
    337  *                                       4            14
    338  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
    339  *         where the remez error is
    340  *
    341  *      |              2     4     6     8     10    12     14 |     -58
    342  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
    343  *      |                                                      |
    344  *
    345  *                     4     6     8     10    12     14
    346  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
    347  *             cos(x) = 1 - x*x/2 + r
    348  *         since cos(x+y) ~ cos(x) - sin(x)*y
    349  *                        ~ cos(x) - x*y,
    350  *         a correction term is necessary in cos(x) and hence
    351  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
    352  *         For better accuracy when x > 0.3, let qx = |x|/4 with
    353  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
    354  *         Then
    355  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
    356  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
    357  *         magnitude of the latter is at least a quarter of x*x/2,
    358  *         thus, reducing the rounding error in the subtraction.
    359  */
    360 V8_INLINE double __kernel_cos(double x, double y) {
    361   static const double
    362       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
    363       C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
    364       C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
    365       C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
    366       C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
    367       C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
    368       C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
    369 
    370   double a, iz, z, r, qx;
    371   int32_t ix;
    372   GET_HIGH_WORD(ix, x);
    373   ix &= 0x7fffffff;                           /* ix = |x|'s high word*/
    374   if (ix < 0x3e400000) {                      /* if x < 2**27 */
    375     if (static_cast<int>(x) == 0) return one; /* generate inexact */
    376   }
    377   z = x * x;
    378   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
    379   if (ix < 0x3FD33333) { /* if |x| < 0.3 */
    380     return one - (0.5 * z - (z * r - x * y));
    381   } else {
    382     if (ix > 0x3fe90000) { /* x > 0.78125 */
    383       qx = 0.28125;
    384     } else {
    385       INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
    386     }
    387     iz = 0.5 * z - qx;
    388     a = one - qx;
    389     return a - (iz - (z * r - x * y));
    390   }
    391 }
    392 
    393 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
    394  * double x[],y[]; int e0,nx,prec; int ipio2[];
    395  *
    396  * __kernel_rem_pio2 return the last three digits of N with
    397  *              y = x - N*pi/2
    398  * so that |y| < pi/2.
    399  *
    400  * The method is to compute the integer (mod 8) and fraction parts of
    401  * (2/pi)*x without doing the full multiplication. In general we
    402  * skip the part of the product that are known to be a huge integer (
    403  * more accurately, = 0 mod 8 ). Thus the number of operations are
    404  * independent of the exponent of the input.
    405  *
    406  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
    407  *
    408  * Input parameters:
    409  *      x[]     The input value (must be positive) is broken into nx
    410  *              pieces of 24-bit integers in double precision format.
    411  *              x[i] will be the i-th 24 bit of x. The scaled exponent
    412  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
    413  *              match x's up to 24 bits.
    414  *
    415  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
    416  *                      e0 = ilogb(z)-23
    417  *                      z  = scalbn(z,-e0)
    418  *              for i = 0,1,2
    419  *                      x[i] = floor(z)
    420  *                      z    = (z-x[i])*2**24
    421  *
    422  *
    423  *      y[]     output result in an array of double precision numbers.
    424  *              The dimension of y[] is:
    425  *                      24-bit  precision       1
    426  *                      53-bit  precision       2
    427  *                      64-bit  precision       2
    428  *                      113-bit precision       3
    429  *              The actual value is the sum of them. Thus for 113-bit
    430  *              precison, one may have to do something like:
    431  *
    432  *              long double t,w,r_head, r_tail;
    433  *              t = (long double)y[2] + (long double)y[1];
    434  *              w = (long double)y[0];
    435  *              r_head = t+w;
    436  *              r_tail = w - (r_head - t);
    437  *
    438  *      e0      The exponent of x[0]
    439  *
    440  *      nx      dimension of x[]
    441  *
    442  *      prec    an integer indicating the precision:
    443  *                      0       24  bits (single)
    444  *                      1       53  bits (double)
    445  *                      2       64  bits (extended)
    446  *                      3       113 bits (quad)
    447  *
    448  *      ipio2[]
    449  *              integer array, contains the (24*i)-th to (24*i+23)-th
    450  *              bit of 2/pi after binary point. The corresponding
    451  *              floating value is
    452  *
    453  *                      ipio2[i] * 2^(-24(i+1)).
    454  *
    455  * External function:
    456  *      double scalbn(), floor();
    457  *
    458  *
    459  * Here is the description of some local variables:
    460  *
    461  *      jk      jk+1 is the initial number of terms of ipio2[] needed
    462  *              in the computation. The recommended value is 2,3,4,
    463  *              6 for single, double, extended,and quad.
    464  *
    465  *      jz      local integer variable indicating the number of
    466  *              terms of ipio2[] used.
    467  *
    468  *      jx      nx - 1
    469  *
    470  *      jv      index for pointing to the suitable ipio2[] for the
    471  *              computation. In general, we want
    472  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
    473  *              is an integer. Thus
    474  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
    475  *              Hence jv = max(0,(e0-3)/24).
    476  *
    477  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
    478  *
    479  *      q[]     double array with integral value, representing the
    480  *              24-bits chunk of the product of x and 2/pi.
    481  *
    482  *      q0      the corresponding exponent of q[0]. Note that the
    483  *              exponent for q[i] would be q0-24*i.
    484  *
    485  *      PIo2[]  double precision array, obtained by cutting pi/2
    486  *              into 24 bits chunks.
    487  *
    488  *      f[]     ipio2[] in floating point
    489  *
    490  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
    491  *
    492  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
    493  *
    494  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
    495  *              it also indicates the *sign* of the result.
    496  *
    497  */
    498 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
    499                       const int32_t *ipio2) {
    500   /* Constants:
    501    * The hexadecimal values are the intended ones for the following
    502    * constants. The decimal values may be used, provided that the
    503    * compiler will convert from decimal to binary accurately enough
    504    * to produce the hexadecimal values shown.
    505    */
    506   static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
    507 
    508   static const double PIo2[] = {
    509       1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
    510       7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
    511       5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
    512       3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
    513       1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
    514       1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
    515       2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
    516       2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
    517   };
    518 
    519   static const double
    520       zero = 0.0,
    521       one = 1.0,
    522       two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
    523       twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
    524 
    525   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
    526   double z, fw, f[20], fq[20], q[20];
    527 
    528   /* initialize jk*/
    529   jk = init_jk[prec];
    530   jp = jk;
    531 
    532   /* determine jx,jv,q0, note that 3>q0 */
    533   jx = nx - 1;
    534   jv = (e0 - 3) / 24;
    535   if (jv < 0) jv = 0;
    536   q0 = e0 - 24 * (jv + 1);
    537 
    538   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
    539   j = jv - jx;
    540   m = jx + jk;
    541   for (i = 0; i <= m; i++, j++) {
    542     f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
    543   }
    544 
    545   /* compute q[0],q[1],...q[jk] */
    546   for (i = 0; i <= jk; i++) {
    547     for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
    548     q[i] = fw;
    549   }
    550 
    551   jz = jk;
    552 recompute:
    553   /* distill q[] into iq[] reversingly */
    554   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
    555     fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
    556     iq[i] = static_cast<int32_t>(z - two24 * fw);
    557     z = q[j - 1] + fw;
    558   }
    559 
    560   /* compute n */
    561   z = scalbn(z, q0);           /* actual value of z */
    562   z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
    563   n = static_cast<int32_t>(z);
    564   z -= static_cast<double>(n);
    565   ih = 0;
    566   if (q0 > 0) { /* need iq[jz-1] to determine n */
    567     i = (iq[jz - 1] >> (24 - q0));
    568     n += i;
    569     iq[jz - 1] -= i << (24 - q0);
    570     ih = iq[jz - 1] >> (23 - q0);
    571   } else if (q0 == 0) {
    572     ih = iq[jz - 1] >> 23;
    573   } else if (z >= 0.5) {
    574     ih = 2;
    575   }
    576 
    577   if (ih > 0) { /* q > 0.5 */
    578     n += 1;
    579     carry = 0;
    580     for (i = 0; i < jz; i++) { /* compute 1-q */
    581       j = iq[i];
    582       if (carry == 0) {
    583         if (j != 0) {
    584           carry = 1;
    585           iq[i] = 0x1000000 - j;
    586         }
    587       } else {
    588         iq[i] = 0xffffff - j;
    589       }
    590     }
    591     if (q0 > 0) { /* rare case: chance is 1 in 12 */
    592       switch (q0) {
    593         case 1:
    594           iq[jz - 1] &= 0x7fffff;
    595           break;
    596         case 2:
    597           iq[jz - 1] &= 0x3fffff;
    598           break;
    599       }
    600     }
    601     if (ih == 2) {
    602       z = one - z;
    603       if (carry != 0) z -= scalbn(one, q0);
    604     }
    605   }
    606 
    607   /* check if recomputation is needed */
    608   if (z == zero) {
    609     j = 0;
    610     for (i = jz - 1; i >= jk; i--) j |= iq[i];
    611     if (j == 0) { /* need recomputation */
    612       for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
    613         /* k = no. of terms needed */
    614       }
    615 
    616       for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
    617         f[jx + i] = ipio2[jv + i];
    618         for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
    619         q[i] = fw;
    620       }
    621       jz += k;
    622       goto recompute;
    623     }
    624   }
    625 
    626   /* chop off zero terms */
    627   if (z == 0.0) {
    628     jz -= 1;
    629     q0 -= 24;
    630     while (iq[jz] == 0) {
    631       jz--;
    632       q0 -= 24;
    633     }
    634   } else { /* break z into 24-bit if necessary */
    635     z = scalbn(z, -q0);
    636     if (z >= two24) {
    637       fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
    638       iq[jz] = z - two24 * fw;
    639       jz += 1;
    640       q0 += 24;
    641       iq[jz] = fw;
    642     } else {
    643       iq[jz] = z;
    644     }
    645   }
    646 
    647   /* convert integer "bit" chunk to floating-point value */
    648   fw = scalbn(one, q0);
    649   for (i = jz; i >= 0; i--) {
    650     q[i] = fw * iq[i];
    651     fw *= twon24;
    652   }
    653 
    654   /* compute PIo2[0,...,jp]*q[jz,...,0] */
    655   for (i = jz; i >= 0; i--) {
    656     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
    657     fq[jz - i] = fw;
    658   }
    659 
    660   /* compress fq[] into y[] */
    661   switch (prec) {
    662     case 0:
    663       fw = 0.0;
    664       for (i = jz; i >= 0; i--) fw += fq[i];
    665       y[0] = (ih == 0) ? fw : -fw;
    666       break;
    667     case 1:
    668     case 2:
    669       fw = 0.0;
    670       for (i = jz; i >= 0; i--) fw += fq[i];
    671       y[0] = (ih == 0) ? fw : -fw;
    672       fw = fq[0] - fw;
    673       for (i = 1; i <= jz; i++) fw += fq[i];
    674       y[1] = (ih == 0) ? fw : -fw;
    675       break;
    676     case 3: /* painful */
    677       for (i = jz; i > 0; i--) {
    678         fw = fq[i - 1] + fq[i];
    679         fq[i] += fq[i - 1] - fw;
    680         fq[i - 1] = fw;
    681       }
    682       for (i = jz; i > 1; i--) {
    683         fw = fq[i - 1] + fq[i];
    684         fq[i] += fq[i - 1] - fw;
    685         fq[i - 1] = fw;
    686       }
    687       for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
    688       if (ih == 0) {
    689         y[0] = fq[0];
    690         y[1] = fq[1];
    691         y[2] = fw;
    692       } else {
    693         y[0] = -fq[0];
    694         y[1] = -fq[1];
    695         y[2] = -fw;
    696       }
    697   }
    698   return n & 7;
    699 }
    700 
    701 /* __kernel_sin( x, y, iy)
    702  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
    703  * Input x is assumed to be bounded by ~pi/4 in magnitude.
    704  * Input y is the tail of x.
    705  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
    706  *
    707  * Algorithm
    708  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
    709  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
    710  *      3. sin(x) is approximated by a polynomial of degree 13 on
    711  *         [0,pi/4]
    712  *                               3            13
    713  *              sin(x) ~ x + S1*x + ... + S6*x
    714  *         where
    715  *
    716  *      |sin(x)         2     4     6     8     10     12  |     -58
    717  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
    718  *      |  x                                               |
    719  *
    720  *      4. sin(x+y) = sin(x) + sin'(x')*y
    721  *                  ~ sin(x) + (1-x*x/2)*y
    722  *         For better accuracy, let
    723  *                   3      2      2      2      2
    724  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
    725  *         then                   3    2
    726  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
    727  */
    728 V8_INLINE double __kernel_sin(double x, double y, int iy) {
    729   static const double
    730       half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
    731       S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
    732       S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
    733       S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
    734       S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
    735       S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
    736       S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
    737 
    738   double z, r, v;
    739   int32_t ix;
    740   GET_HIGH_WORD(ix, x);
    741   ix &= 0x7fffffff;      /* high word of x */
    742   if (ix < 0x3e400000) { /* |x| < 2**-27 */
    743     if (static_cast<int>(x) == 0) return x;
    744   } /* generate inexact */
    745   z = x * x;
    746   v = z * x;
    747   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
    748   if (iy == 0) {
    749     return x + v * (S1 + z * r);
    750   } else {
    751     return x - ((z * (half * y - v * r) - y) - v * S1);
    752   }
    753 }
    754 
    755 /* __kernel_tan( x, y, k )
    756  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
    757  * Input x is assumed to be bounded by ~pi/4 in magnitude.
    758  * Input y is the tail of x.
    759  * Input k indicates whether tan (if k=1) or
    760  * -1/tan (if k= -1) is returned.
    761  *
    762  * Algorithm
    763  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
    764  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
    765  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
    766  *         [0,0.67434]
    767  *                               3             27
    768  *              tan(x) ~ x + T1*x + ... + T13*x
    769  *         where
    770  *
    771  *              |tan(x)         2     4            26   |     -59.2
    772  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
    773  *              |  x                                    |
    774  *
    775  *         Note: tan(x+y) = tan(x) + tan'(x)*y
    776  *                        ~ tan(x) + (1+x*x)*y
    777  *         Therefore, for better accuracy in computing tan(x+y), let
    778  *                   3      2      2       2       2
    779  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
    780  *         then
    781  *                                  3    2
    782  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
    783  *
    784  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
    785  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
    786  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
    787  */
    788 double __kernel_tan(double x, double y, int iy) {
    789   static const double xxx[] = {
    790       3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
    791       1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
    792       5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
    793       2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
    794       8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
    795       3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
    796       1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
    797       5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
    798       2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
    799       7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
    800       7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
    801       -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
    802       2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
    803       /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
    804       /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
    805       /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
    806   };
    807 #define one xxx[13]
    808 #define pio4 xxx[14]
    809 #define pio4lo xxx[15]
    810 #define T xxx
    811 
    812   double z, r, v, w, s;
    813   int32_t ix, hx;
    814 
    815   GET_HIGH_WORD(hx, x);             /* high word of x */
    816   ix = hx & 0x7fffffff;             /* high word of |x| */
    817   if (ix < 0x3e300000) {            /* x < 2**-28 */
    818     if (static_cast<int>(x) == 0) { /* generate inexact */
    819       uint32_t low;
    820       GET_LOW_WORD(low, x);
    821       if (((ix | low) | (iy + 1)) == 0) {
    822         return one / fabs(x);
    823       } else {
    824         if (iy == 1) {
    825           return x;
    826         } else { /* compute -1 / (x+y) carefully */
    827           double a, t;
    828 
    829           z = w = x + y;
    830           SET_LOW_WORD(z, 0);
    831           v = y - (z - x);
    832           t = a = -one / w;
    833           SET_LOW_WORD(t, 0);
    834           s = one + t * z;
    835           return t + a * (s + t * v);
    836         }
    837       }
    838     }
    839   }
    840   if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
    841     if (hx < 0) {
    842       x = -x;
    843       y = -y;
    844     }
    845     z = pio4 - x;
    846     w = pio4lo - y;
    847     x = z + w;
    848     y = 0.0;
    849   }
    850   z = x * x;
    851   w = z * z;
    852   /*
    853    * Break x^5*(T[1]+x^2*T[2]+...) into
    854    * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
    855    * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
    856    */
    857   r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
    858   v = z *
    859       (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
    860   s = z * x;
    861   r = y + z * (s * (r + v) + y);
    862   r += T[0] * s;
    863   w = x + r;
    864   if (ix >= 0x3FE59428) {
    865     v = iy;
    866     return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
    867   }
    868   if (iy == 1) {
    869     return w;
    870   } else {
    871     /*
    872      * if allow error up to 2 ulp, simply return
    873      * -1.0 / (x+r) here
    874      */
    875     /* compute -1.0 / (x+r) accurately */
    876     double a, t;
    877     z = w;
    878     SET_LOW_WORD(z, 0);
    879     v = r - (z - x);  /* z+v = r+x */
    880     t = a = -1.0 / w; /* a = -1.0/w */
    881     SET_LOW_WORD(t, 0);
    882     s = 1.0 + t * z;
    883     return t + a * (s + t * v);
    884   }
    885 
    886 #undef one
    887 #undef pio4
    888 #undef pio4lo
    889 #undef T
    890 }
    891 
    892 }  // namespace
    893 
    894 /* atan(x)
    895  * Method
    896  *   1. Reduce x to positive by atan(x) = -atan(-x).
    897  *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
    898  *      is further reduced to one of the following intervals and the
    899  *      arctangent of t is evaluated by the corresponding formula:
    900  *
    901  *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
    902  *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
    903  *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
    904  *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
    905  *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
    906  *
    907  * Constants:
    908  * The hexadecimal values are the intended ones for the following
    909  * constants. The decimal values may be used, provided that the
    910  * compiler will convert from decimal to binary accurately enough
    911  * to produce the hexadecimal values shown.
    912  */
    913 double atan(double x) {
    914   static const double atanhi[] = {
    915       4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
    916       7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
    917       9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
    918       1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
    919   };
    920 
    921   static const double atanlo[] = {
    922       2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
    923       3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
    924       1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
    925       6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
    926   };
    927 
    928   static const double aT[] = {
    929       3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
    930       -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
    931       1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
    932       -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
    933       9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
    934       -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
    935       6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
    936       -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
    937       4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
    938       -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
    939       1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
    940   };
    941 
    942   static const double one = 1.0, huge = 1.0e300;
    943 
    944   double w, s1, s2, z;
    945   int32_t ix, hx, id;
    946 
    947   GET_HIGH_WORD(hx, x);
    948   ix = hx & 0x7fffffff;
    949   if (ix >= 0x44100000) { /* if |x| >= 2^66 */
    950     uint32_t low;
    951     GET_LOW_WORD(low, x);
    952     if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
    953       return x + x; /* NaN */
    954     if (hx > 0)
    955       return atanhi[3] + *(volatile double *)&atanlo[3];
    956     else
    957       return -atanhi[3] - *(volatile double *)&atanlo[3];
    958   }
    959   if (ix < 0x3fdc0000) {            /* |x| < 0.4375 */
    960     if (ix < 0x3e400000) {          /* |x| < 2^-27 */
    961       if (huge + x > one) return x; /* raise inexact */
    962     }
    963     id = -1;
    964   } else {
    965     x = fabs(x);
    966     if (ix < 0x3ff30000) {   /* |x| < 1.1875 */
    967       if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
    968         id = 0;
    969         x = (2.0 * x - one) / (2.0 + x);
    970       } else { /* 11/16<=|x|< 19/16 */
    971         id = 1;
    972         x = (x - one) / (x + one);
    973       }
    974     } else {
    975       if (ix < 0x40038000) { /* |x| < 2.4375 */
    976         id = 2;
    977         x = (x - 1.5) / (one + 1.5 * x);
    978       } else { /* 2.4375 <= |x| < 2^66 */
    979         id = 3;
    980         x = -1.0 / x;
    981       }
    982     }
    983   }
    984   /* end of argument reduction */
    985   z = x * x;
    986   w = z * z;
    987   /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
    988   s1 = z * (aT[0] +
    989             w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
    990   s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
    991   if (id < 0) {
    992     return x - x * (s1 + s2);
    993   } else {
    994     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
    995     return (hx < 0) ? -z : z;
    996   }
    997 }
    998 
    999 /* atan2(y,x)
   1000  * Method :
   1001  *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
   1002  *  2. Reduce x to positive by (if x and y are unexceptional):
   1003  *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
   1004  *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
   1005  *
   1006  * Special cases:
   1007  *
   1008  *  ATAN2((anything), NaN ) is NaN;
   1009  *  ATAN2(NAN , (anything) ) is NaN;
   1010  *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
   1011  *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
   1012  *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
   1013  *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
   1014  *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
   1015  *  ATAN2(+-INF,+INF ) is +-pi/4 ;
   1016  *  ATAN2(+-INF,-INF ) is +-3pi/4;
   1017  *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
   1018  *
   1019  * Constants:
   1020  * The hexadecimal values are the intended ones for the following
   1021  * constants. The decimal values may be used, provided that the
   1022  * compiler will convert from decimal to binary accurately enough
   1023  * to produce the hexadecimal values shown.
   1024  */
   1025 double atan2(double y, double x) {
   1026   static volatile double tiny = 1.0e-300;
   1027   static const double
   1028       zero = 0.0,
   1029       pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
   1030       pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
   1031       pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
   1032   static volatile double pi_lo =
   1033       1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
   1034 
   1035   double z;
   1036   int32_t k, m, hx, hy, ix, iy;
   1037   uint32_t lx, ly;
   1038 
   1039   EXTRACT_WORDS(hx, lx, x);
   1040   ix = hx & 0x7fffffff;
   1041   EXTRACT_WORDS(hy, ly, y);
   1042   iy = hy & 0x7fffffff;
   1043   if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
   1044       ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
   1045     return x + y; /* x or y is NaN */
   1046   }
   1047   if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
   1048   m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
   1049 
   1050   /* when y = 0 */
   1051   if ((iy | ly) == 0) {
   1052     switch (m) {
   1053       case 0:
   1054       case 1:
   1055         return y; /* atan(+-0,+anything)=+-0 */
   1056       case 2:
   1057         return pi + tiny; /* atan(+0,-anything) = pi */
   1058       case 3:
   1059         return -pi - tiny; /* atan(-0,-anything) =-pi */
   1060     }
   1061   }
   1062   /* when x = 0 */
   1063   if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
   1064 
   1065   /* when x is INF */
   1066   if (ix == 0x7ff00000) {
   1067     if (iy == 0x7ff00000) {
   1068       switch (m) {
   1069         case 0:
   1070           return pi_o_4 + tiny; /* atan(+INF,+INF) */
   1071         case 1:
   1072           return -pi_o_4 - tiny; /* atan(-INF,+INF) */
   1073         case 2:
   1074           return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
   1075         case 3:
   1076           return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
   1077       }
   1078     } else {
   1079       switch (m) {
   1080         case 0:
   1081           return zero; /* atan(+...,+INF) */
   1082         case 1:
   1083           return -zero; /* atan(-...,+INF) */
   1084         case 2:
   1085           return pi + tiny; /* atan(+...,-INF) */
   1086         case 3:
   1087           return -pi - tiny; /* atan(-...,-INF) */
   1088       }
   1089     }
   1090   }
   1091   /* when y is INF */
   1092   if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
   1093 
   1094   /* compute y/x */
   1095   k = (iy - ix) >> 20;
   1096   if (k > 60) { /* |y/x| >  2**60 */
   1097     z = pi_o_2 + 0.5 * pi_lo;
   1098     m &= 1;
   1099   } else if (hx < 0 && k < -60) {
   1100     z = 0.0; /* 0 > |y|/x > -2**-60 */
   1101   } else {
   1102     z = atan(fabs(y / x)); /* safe to do y/x */
   1103   }
   1104   switch (m) {
   1105     case 0:
   1106       return z; /* atan(+,+) */
   1107     case 1:
   1108       return -z; /* atan(-,+) */
   1109     case 2:
   1110       return pi - (z - pi_lo); /* atan(+,-) */
   1111     default:                   /* case 3 */
   1112       return (z - pi_lo) - pi; /* atan(-,-) */
   1113   }
   1114 }
   1115 
   1116 /* cos(x)
   1117  * Return cosine function of x.
   1118  *
   1119  * kernel function:
   1120  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   1121  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   1122  *      __ieee754_rem_pio2      ... argument reduction routine
   1123  *
   1124  * Method.
   1125  *      Let S,C and T denote the sin, cos and tan respectively on
   1126  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   1127  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   1128  *      We have
   1129  *
   1130  *          n        sin(x)      cos(x)        tan(x)
   1131  *     ----------------------------------------------------------
   1132  *          0          S           C             T
   1133  *          1          C          -S            -1/T
   1134  *          2         -S          -C             T
   1135  *          3         -C           S            -1/T
   1136  *     ----------------------------------------------------------
   1137  *
   1138  * Special cases:
   1139  *      Let trig be any of sin, cos, or tan.
   1140  *      trig(+-INF)  is NaN, with signals;
   1141  *      trig(NaN)    is that NaN;
   1142  *
   1143  * Accuracy:
   1144  *      TRIG(x) returns trig(x) nearly rounded
   1145  */
   1146 double cos(double x) {
   1147   double y[2], z = 0.0;
   1148   int32_t n, ix;
   1149 
   1150   /* High word of x. */
   1151   GET_HIGH_WORD(ix, x);
   1152 
   1153   /* |x| ~< pi/4 */
   1154   ix &= 0x7fffffff;
   1155   if (ix <= 0x3fe921fb) {
   1156     return __kernel_cos(x, z);
   1157   } else if (ix >= 0x7ff00000) {
   1158     /* cos(Inf or NaN) is NaN */
   1159     return x - x;
   1160   } else {
   1161     /* argument reduction needed */
   1162     n = __ieee754_rem_pio2(x, y);
   1163     switch (n & 3) {
   1164       case 0:
   1165         return __kernel_cos(y[0], y[1]);
   1166       case 1:
   1167         return -__kernel_sin(y[0], y[1], 1);
   1168       case 2:
   1169         return -__kernel_cos(y[0], y[1]);
   1170       default:
   1171         return __kernel_sin(y[0], y[1], 1);
   1172     }
   1173   }
   1174 }
   1175 
   1176 /* exp(x)
   1177  * Returns the exponential of x.
   1178  *
   1179  * Method
   1180  *   1. Argument reduction:
   1181  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
   1182  *      Given x, find r and integer k such that
   1183  *
   1184  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
   1185  *
   1186  *      Here r will be represented as r = hi-lo for better
   1187  *      accuracy.
   1188  *
   1189  *   2. Approximation of exp(r) by a special rational function on
   1190  *      the interval [0,0.34658]:
   1191  *      Write
   1192  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
   1193  *      We use a special Remes algorithm on [0,0.34658] to generate
   1194  *      a polynomial of degree 5 to approximate R. The maximum error
   1195  *      of this polynomial approximation is bounded by 2**-59. In
   1196  *      other words,
   1197  *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
   1198  *      (where z=r*r, and the values of P1 to P5 are listed below)
   1199  *      and
   1200  *          |                  5          |     -59
   1201  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
   1202  *          |                             |
   1203  *      The computation of exp(r) thus becomes
   1204  *                             2*r
   1205  *              exp(r) = 1 + -------
   1206  *                            R - r
   1207  *                                 r*R1(r)
   1208  *                     = 1 + r + ----------- (for better accuracy)
   1209  *                                2 - R1(r)
   1210  *      where
   1211  *                               2       4             10
   1212  *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
   1213  *
   1214  *   3. Scale back to obtain exp(x):
   1215  *      From step 1, we have
   1216  *         exp(x) = 2^k * exp(r)
   1217  *
   1218  * Special cases:
   1219  *      exp(INF) is INF, exp(NaN) is NaN;
   1220  *      exp(-INF) is 0, and
   1221  *      for finite argument, only exp(0)=1 is exact.
   1222  *
   1223  * Accuracy:
   1224  *      according to an error analysis, the error is always less than
   1225  *      1 ulp (unit in the last place).
   1226  *
   1227  * Misc. info.
   1228  *      For IEEE double
   1229  *          if x >  7.09782712893383973096e+02 then exp(x) overflow
   1230  *          if x < -7.45133219101941108420e+02 then exp(x) underflow
   1231  *
   1232  * Constants:
   1233  * The hexadecimal values are the intended ones for the following
   1234  * constants. The decimal values may be used, provided that the
   1235  * compiler will convert from decimal to binary accurately enough
   1236  * to produce the hexadecimal values shown.
   1237  */
   1238 double exp(double x) {
   1239   static const double
   1240       one = 1.0,
   1241       halF[2] = {0.5, -0.5},
   1242       o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
   1243       u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
   1244       ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3fe62e42, 0xfee00000 */
   1245                   -6.93147180369123816490e-01},  /* 0xbfe62e42, 0xfee00000 */
   1246       ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3dea39ef, 0x35793c76 */
   1247                   -1.90821492927058770002e-10},  /* 0xbdea39ef, 0x35793c76 */
   1248       invln2 = 1.44269504088896338700e+00,       /* 0x3ff71547, 0x652b82fe */
   1249       P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
   1250       P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
   1251       P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
   1252       P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
   1253       P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
   1254       E = 2.718281828459045;                     /* 0x4005bf0a, 0x8b145769 */
   1255 
   1256   static volatile double
   1257       huge = 1.0e+300,
   1258       twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
   1259       two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
   1260 
   1261   double y, hi = 0.0, lo = 0.0, c, t, twopk;
   1262   int32_t k = 0, xsb;
   1263   uint32_t hx;
   1264 
   1265   GET_HIGH_WORD(hx, x);
   1266   xsb = (hx >> 31) & 1; /* sign bit of x */
   1267   hx &= 0x7fffffff;     /* high word of |x| */
   1268 
   1269   /* filter out non-finite argument */
   1270   if (hx >= 0x40862E42) { /* if |x|>=709.78... */
   1271     if (hx >= 0x7ff00000) {
   1272       uint32_t lx;
   1273       GET_LOW_WORD(lx, x);
   1274       if (((hx & 0xfffff) | lx) != 0)
   1275         return x + x; /* NaN */
   1276       else
   1277         return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
   1278     }
   1279     if (x > o_threshold) return huge * huge;         /* overflow */
   1280     if (x < u_threshold) return twom1000 * twom1000; /* underflow */
   1281   }
   1282 
   1283   /* argument reduction */
   1284   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
   1285     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
   1286       /* TODO(rtoy): We special case exp(1) here to return the correct
   1287        * value of E, as the computation below would get the last bit
   1288        * wrong. We should probably fix the algorithm instead.
   1289        */
   1290       if (x == 1.0) return E;
   1291       hi = x - ln2HI[xsb];
   1292       lo = ln2LO[xsb];
   1293       k = 1 - xsb - xsb;
   1294     } else {
   1295       k = static_cast<int>(invln2 * x + halF[xsb]);
   1296       t = k;
   1297       hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
   1298       lo = t * ln2LO[0];
   1299     }
   1300     STRICT_ASSIGN(double, x, hi - lo);
   1301   } else if (hx < 0x3e300000) {         /* when |x|<2**-28 */
   1302     if (huge + x > one) return one + x; /* trigger inexact */
   1303   } else {
   1304     k = 0;
   1305   }
   1306 
   1307   /* x is now in primary range */
   1308   t = x * x;
   1309   if (k >= -1021) {
   1310     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
   1311   } else {
   1312     INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
   1313   }
   1314   c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
   1315   if (k == 0) {
   1316     return one - ((x * c) / (c - 2.0) - x);
   1317   } else {
   1318     y = one - ((lo - (x * c) / (2.0 - c)) - hi);
   1319   }
   1320   if (k >= -1021) {
   1321     if (k == 1024) return y * 2.0 * two1023;
   1322     return y * twopk;
   1323   } else {
   1324     return y * twopk * twom1000;
   1325   }
   1326 }
   1327 
   1328 /*
   1329  * Method :
   1330  *    1.Reduced x to positive by atanh(-x) = -atanh(x)
   1331  *    2.For x>=0.5
   1332  *              1              2x                          x
   1333  *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
   1334  *              2             1 - x                      1 - x
   1335  *
   1336  *   For x<0.5
   1337  *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
   1338  *
   1339  * Special cases:
   1340  *  atanh(x) is NaN if |x| > 1 with signal;
   1341  *  atanh(NaN) is that NaN with no signal;
   1342  *  atanh(+-1) is +-INF with signal.
   1343  *
   1344  */
   1345 double atanh(double x) {
   1346   static const double one = 1.0, huge = 1e300;
   1347   static const double zero = 0.0;
   1348 
   1349   double t;
   1350   int32_t hx, ix;
   1351   uint32_t lx;
   1352   EXTRACT_WORDS(hx, lx, x);
   1353   ix = hx & 0x7fffffff;
   1354   if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
   1355     return (x - x) / (x - x);
   1356   if (ix == 0x3ff00000) return x / zero;
   1357   if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
   1358   SET_HIGH_WORD(x, ix);
   1359   if (ix < 0x3fe00000) { /* x < 0.5 */
   1360     t = x + x;
   1361     t = 0.5 * log1p(t + t * x / (one - x));
   1362   } else {
   1363     t = 0.5 * log1p((x + x) / (one - x));
   1364   }
   1365   if (hx >= 0)
   1366     return t;
   1367   else
   1368     return -t;
   1369 }
   1370 
   1371 /* log(x)
   1372  * Return the logrithm of x
   1373  *
   1374  * Method :
   1375  *   1. Argument Reduction: find k and f such that
   1376  *     x = 2^k * (1+f),
   1377  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
   1378  *
   1379  *   2. Approximation of log(1+f).
   1380  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
   1381  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
   1382  *         = 2s + s*R
   1383  *      We use a special Reme algorithm on [0,0.1716] to generate
   1384  *  a polynomial of degree 14 to approximate R The maximum error
   1385  *  of this polynomial approximation is bounded by 2**-58.45. In
   1386  *  other words,
   1387  *            2      4      6      8      10      12      14
   1388  *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
   1389  *    (the values of Lg1 to Lg7 are listed in the program)
   1390  *  and
   1391  *      |      2          14          |     -58.45
   1392  *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
   1393  *      |                             |
   1394  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
   1395  *  In order to guarantee error in log below 1ulp, we compute log
   1396  *  by
   1397  *    log(1+f) = f - s*(f - R)  (if f is not too large)
   1398  *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
   1399  *
   1400  *  3. Finally,  log(x) = k*ln2 + log(1+f).
   1401  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
   1402  *     Here ln2 is split into two floating point number:
   1403  *      ln2_hi + ln2_lo,
   1404  *     where n*ln2_hi is always exact for |n| < 2000.
   1405  *
   1406  * Special cases:
   1407  *  log(x) is NaN with signal if x < 0 (including -INF) ;
   1408  *  log(+INF) is +INF; log(0) is -INF with signal;
   1409  *  log(NaN) is that NaN with no signal.
   1410  *
   1411  * Accuracy:
   1412  *  according to an error analysis, the error is always less than
   1413  *  1 ulp (unit in the last place).
   1414  *
   1415  * Constants:
   1416  * The hexadecimal values are the intended ones for the following
   1417  * constants. The decimal values may be used, provided that the
   1418  * compiler will convert from decimal to binary accurately enough
   1419  * to produce the hexadecimal values shown.
   1420  */
   1421 double log(double x) {
   1422   static const double                      /* -- */
   1423       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
   1424       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
   1425       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
   1426       Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
   1427       Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
   1428       Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
   1429       Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
   1430       Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
   1431       Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
   1432       Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
   1433 
   1434   static const double zero = 0.0;
   1435   static volatile double vzero = 0.0;
   1436 
   1437   double hfsq, f, s, z, R, w, t1, t2, dk;
   1438   int32_t k, hx, i, j;
   1439   uint32_t lx;
   1440 
   1441   EXTRACT_WORDS(hx, lx, x);
   1442 
   1443   k = 0;
   1444   if (hx < 0x00100000) { /* x < 2**-1022  */
   1445     if (((hx & 0x7fffffff) | lx) == 0)
   1446       return -two54 / vzero;           /* log(+-0)=-inf */
   1447     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
   1448     k -= 54;
   1449     x *= two54; /* subnormal number, scale up x */
   1450     GET_HIGH_WORD(hx, x);
   1451   }
   1452   if (hx >= 0x7ff00000) return x + x;
   1453   k += (hx >> 20) - 1023;
   1454   hx &= 0x000fffff;
   1455   i = (hx + 0x95f64) & 0x100000;
   1456   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
   1457   k += (i >> 20);
   1458   f = x - 1.0;
   1459   if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
   1460     if (f == zero) {
   1461       if (k == 0) {
   1462         return zero;
   1463       } else {
   1464         dk = static_cast<double>(k);
   1465         return dk * ln2_hi + dk * ln2_lo;
   1466       }
   1467     }
   1468     R = f * f * (0.5 - 0.33333333333333333 * f);
   1469     if (k == 0) {
   1470       return f - R;
   1471     } else {
   1472       dk = static_cast<double>(k);
   1473       return dk * ln2_hi - ((R - dk * ln2_lo) - f);
   1474     }
   1475   }
   1476   s = f / (2.0 + f);
   1477   dk = static_cast<double>(k);
   1478   z = s * s;
   1479   i = hx - 0x6147a;
   1480   w = z * z;
   1481   j = 0x6b851 - hx;
   1482   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
   1483   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
   1484   i |= j;
   1485   R = t2 + t1;
   1486   if (i > 0) {
   1487     hfsq = 0.5 * f * f;
   1488     if (k == 0)
   1489       return f - (hfsq - s * (hfsq + R));
   1490     else
   1491       return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
   1492   } else {
   1493     if (k == 0)
   1494       return f - s * (f - R);
   1495     else
   1496       return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
   1497   }
   1498 }
   1499 
   1500 /* double log1p(double x)
   1501  *
   1502  * Method :
   1503  *   1. Argument Reduction: find k and f such that
   1504  *      1+x = 2^k * (1+f),
   1505  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
   1506  *
   1507  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
   1508  *  may not be representable exactly. In that case, a correction
   1509  *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
   1510  *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
   1511  *  and add back the correction term c/u.
   1512  *  (Note: when x > 2**53, one can simply return log(x))
   1513  *
   1514  *   2. Approximation of log1p(f).
   1515  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
   1516  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
   1517  *         = 2s + s*R
   1518  *      We use a special Reme algorithm on [0,0.1716] to generate
   1519  *  a polynomial of degree 14 to approximate R The maximum error
   1520  *  of this polynomial approximation is bounded by 2**-58.45. In
   1521  *  other words,
   1522  *            2      4      6      8      10      12      14
   1523  *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
   1524  *    (the values of Lp1 to Lp7 are listed in the program)
   1525  *  and
   1526  *      |      2          14          |     -58.45
   1527  *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
   1528  *      |                             |
   1529  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
   1530  *  In order to guarantee error in log below 1ulp, we compute log
   1531  *  by
   1532  *    log1p(f) = f - (hfsq - s*(hfsq+R)).
   1533  *
   1534  *  3. Finally, log1p(x) = k*ln2 + log1p(f).
   1535  *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
   1536  *     Here ln2 is split into two floating point number:
   1537  *      ln2_hi + ln2_lo,
   1538  *     where n*ln2_hi is always exact for |n| < 2000.
   1539  *
   1540  * Special cases:
   1541  *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
   1542  *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
   1543  *  log1p(NaN) is that NaN with no signal.
   1544  *
   1545  * Accuracy:
   1546  *  according to an error analysis, the error is always less than
   1547  *  1 ulp (unit in the last place).
   1548  *
   1549  * Constants:
   1550  * The hexadecimal values are the intended ones for the following
   1551  * constants. The decimal values may be used, provided that the
   1552  * compiler will convert from decimal to binary accurately enough
   1553  * to produce the hexadecimal values shown.
   1554  *
   1555  * Note: Assuming log() return accurate answer, the following
   1556  *   algorithm can be used to compute log1p(x) to within a few ULP:
   1557  *
   1558  *    u = 1+x;
   1559  *    if(u==1.0) return x ; else
   1560  *         return log(u)*(x/(u-1.0));
   1561  *
   1562  *   See HP-15C Advanced Functions Handbook, p.193.
   1563  */
   1564 double log1p(double x) {
   1565   static const double                      /* -- */
   1566       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
   1567       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
   1568       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
   1569       Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
   1570       Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
   1571       Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
   1572       Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
   1573       Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
   1574       Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
   1575       Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
   1576 
   1577   static const double zero = 0.0;
   1578   static volatile double vzero = 0.0;
   1579 
   1580   double hfsq, f, c, s, z, R, u;
   1581   int32_t k, hx, hu, ax;
   1582 
   1583   GET_HIGH_WORD(hx, x);
   1584   ax = hx & 0x7fffffff;
   1585 
   1586   k = 1;
   1587   if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
   1588     if (ax >= 0x3ff00000) { /* x <= -1.0 */
   1589       if (x == -1.0)
   1590         return -two54 / vzero; /* log1p(-1)=+inf */
   1591       else
   1592         return (x - x) / (x - x); /* log1p(x<-1)=NaN */
   1593     }
   1594     if (ax < 0x3e200000) {    /* |x| < 2**-29 */
   1595       if (two54 + x > zero    /* raise inexact */
   1596           && ax < 0x3c900000) /* |x| < 2**-54 */
   1597         return x;
   1598       else
   1599         return x - x * x * 0.5;
   1600     }
   1601     if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
   1602       k = 0;
   1603       f = x;
   1604       hu = 1;
   1605     } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
   1606   }
   1607   if (hx >= 0x7ff00000) return x + x;
   1608   if (k != 0) {
   1609     if (hx < 0x43400000) {
   1610       STRICT_ASSIGN(double, u, 1.0 + x);
   1611       GET_HIGH_WORD(hu, u);
   1612       k = (hu >> 20) - 1023;
   1613       c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
   1614       c /= u;
   1615     } else {
   1616       u = x;
   1617       GET_HIGH_WORD(hu, u);
   1618       k = (hu >> 20) - 1023;
   1619       c = 0;
   1620     }
   1621     hu &= 0x000fffff;
   1622     /*
   1623      * The approximation to sqrt(2) used in thresholds is not
   1624      * critical.  However, the ones used above must give less
   1625      * strict bounds than the one here so that the k==0 case is
   1626      * never reached from here, since here we have committed to
   1627      * using the correction term but don't use it if k==0.
   1628      */
   1629     if (hu < 0x6a09e) {                  /* u ~< sqrt(2) */
   1630       SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
   1631     } else {
   1632       k += 1;
   1633       SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
   1634       hu = (0x00100000 - hu) >> 2;
   1635     }
   1636     f = u - 1.0;
   1637   }
   1638   hfsq = 0.5 * f * f;
   1639   if (hu == 0) { /* |f| < 2**-20 */
   1640     if (f == zero) {
   1641       if (k == 0) {
   1642         return zero;
   1643       } else {
   1644         c += k * ln2_lo;
   1645         return k * ln2_hi + c;
   1646       }
   1647     }
   1648     R = hfsq * (1.0 - 0.66666666666666666 * f);
   1649     if (k == 0)
   1650       return f - R;
   1651     else
   1652       return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
   1653   }
   1654   s = f / (2.0 + f);
   1655   z = s * s;
   1656   R = z * (Lp1 +
   1657            z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
   1658   if (k == 0)
   1659     return f - (hfsq - s * (hfsq + R));
   1660   else
   1661     return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
   1662 }
   1663 
   1664 /*
   1665  * k_log1p(f):
   1666  * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
   1667  *
   1668  * The following describes the overall strategy for computing
   1669  * logarithms in base e.  The argument reduction and adding the final
   1670  * term of the polynomial are done by the caller for increased accuracy
   1671  * when different bases are used.
   1672  *
   1673  * Method :
   1674  *   1. Argument Reduction: find k and f such that
   1675  *         x = 2^k * (1+f),
   1676  *         where  sqrt(2)/2 < 1+f < sqrt(2) .
   1677  *
   1678  *   2. Approximation of log(1+f).
   1679  *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
   1680  *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
   1681  *            = 2s + s*R
   1682  *      We use a special Reme algorithm on [0,0.1716] to generate
   1683  *      a polynomial of degree 14 to approximate R The maximum error
   1684  *      of this polynomial approximation is bounded by 2**-58.45. In
   1685  *      other words,
   1686  *          2      4      6      8      10      12      14
   1687  *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
   1688  *      (the values of Lg1 to Lg7 are listed in the program)
   1689  *      and
   1690  *          |      2          14          |     -58.45
   1691  *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
   1692  *          |                             |
   1693  *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
   1694  *      In order to guarantee error in log below 1ulp, we compute log
   1695  *      by
   1696  *          log(1+f) = f - s*(f - R)            (if f is not too large)
   1697  *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
   1698  *
   1699  *   3. Finally,  log(x) = k*ln2 + log(1+f).
   1700  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
   1701  *      Here ln2 is split into two floating point number:
   1702  *          ln2_hi + ln2_lo,
   1703  *      where n*ln2_hi is always exact for |n| < 2000.
   1704  *
   1705  * Special cases:
   1706  *      log(x) is NaN with signal if x < 0 (including -INF) ;
   1707  *      log(+INF) is +INF; log(0) is -INF with signal;
   1708  *      log(NaN) is that NaN with no signal.
   1709  *
   1710  * Accuracy:
   1711  *      according to an error analysis, the error is always less than
   1712  *      1 ulp (unit in the last place).
   1713  *
   1714  * Constants:
   1715  * The hexadecimal values are the intended ones for the following
   1716  * constants. The decimal values may be used, provided that the
   1717  * compiler will convert from decimal to binary accurately enough
   1718  * to produce the hexadecimal values shown.
   1719  */
   1720 
   1721 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
   1722     Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
   1723     Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
   1724     Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
   1725     Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
   1726     Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
   1727     Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
   1728 
   1729 /*
   1730  * We always inline k_log1p(), since doing so produces a
   1731  * substantial performance improvement (~40% on amd64).
   1732  */
   1733 static inline double k_log1p(double f) {
   1734   double hfsq, s, z, R, w, t1, t2;
   1735 
   1736   s = f / (2.0 + f);
   1737   z = s * s;
   1738   w = z * z;
   1739   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
   1740   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
   1741   R = t2 + t1;
   1742   hfsq = 0.5 * f * f;
   1743   return s * (hfsq + R);
   1744 }
   1745 
   1746 /*
   1747  * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
   1748  * comments.
   1749  *
   1750  * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
   1751  * then does the combining and scaling steps
   1752  *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
   1753  * in not-quite-routine extra precision.
   1754  */
   1755 double log2(double x) {
   1756   static const double
   1757       two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
   1758       ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
   1759       ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
   1760 
   1761   static const double zero = 0.0;
   1762   static volatile double vzero = 0.0;
   1763 
   1764   double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
   1765   int32_t i, k, hx;
   1766   uint32_t lx;
   1767 
   1768   EXTRACT_WORDS(hx, lx, x);
   1769 
   1770   k = 0;
   1771   if (hx < 0x00100000) { /* x < 2**-1022  */
   1772     if (((hx & 0x7fffffff) | lx) == 0)
   1773       return -two54 / vzero;           /* log(+-0)=-inf */
   1774     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
   1775     k -= 54;
   1776     x *= two54; /* subnormal number, scale up x */
   1777     GET_HIGH_WORD(hx, x);
   1778   }
   1779   if (hx >= 0x7ff00000) return x + x;
   1780   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
   1781   k += (hx >> 20) - 1023;
   1782   hx &= 0x000fffff;
   1783   i = (hx + 0x95f64) & 0x100000;
   1784   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
   1785   k += (i >> 20);
   1786   y = static_cast<double>(k);
   1787   f = x - 1.0;
   1788   hfsq = 0.5 * f * f;
   1789   r = k_log1p(f);
   1790 
   1791   /*
   1792    * f-hfsq must (for args near 1) be evaluated in extra precision
   1793    * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
   1794    * This is fairly efficient since f-hfsq only depends on f, so can
   1795    * be evaluated in parallel with R.  Not combining hfsq with R also
   1796    * keeps R small (though not as small as a true `lo' term would be),
   1797    * so that extra precision is not needed for terms involving R.
   1798    *
   1799    * Compiler bugs involving extra precision used to break Dekker's
   1800    * theorem for spitting f-hfsq as hi+lo, unless double_t was used
   1801    * or the multi-precision calculations were avoided when double_t
   1802    * has extra precision.  These problems are now automatically
   1803    * avoided as a side effect of the optimization of combining the
   1804    * Dekker splitting step with the clear-low-bits step.
   1805    *
   1806    * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
   1807    * precision to avoid a very large cancellation when x is very near
   1808    * these values.  Unlike the above cancellations, this problem is
   1809    * specific to base 2.  It is strange that adding +-1 is so much
   1810    * harder than adding +-ln2 or +-log10_2.
   1811    *
   1812    * This uses Dekker's theorem to normalize y+val_hi, so the
   1813    * compiler bugs are back in some configurations, sigh.  And I
   1814    * don't want to used double_t to avoid them, since that gives a
   1815    * pessimization and the support for avoiding the pessimization
   1816    * is not yet available.
   1817    *
   1818    * The multi-precision calculations for the multiplications are
   1819    * routine.
   1820    */
   1821   hi = f - hfsq;
   1822   SET_LOW_WORD(hi, 0);
   1823   lo = (f - hi) - hfsq + r;
   1824   val_hi = hi * ivln2hi;
   1825   val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
   1826 
   1827   /* spadd(val_hi, val_lo, y), except for not using double_t: */
   1828   w = y + val_hi;
   1829   val_lo += (y - w) + val_hi;
   1830   val_hi = w;
   1831 
   1832   return val_lo + val_hi;
   1833 }
   1834 
   1835 /*
   1836  * Return the base 10 logarithm of x
   1837  *
   1838  * Method :
   1839  *      Let log10_2hi = leading 40 bits of log10(2) and
   1840  *          log10_2lo = log10(2) - log10_2hi,
   1841  *          ivln10   = 1/log(10) rounded.
   1842  *      Then
   1843  *              n = ilogb(x),
   1844  *              if(n<0)  n = n+1;
   1845  *              x = scalbn(x,-n);
   1846  *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
   1847  *
   1848  *  Note 1:
   1849  *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
   1850  *     mode must set to Round-to-Nearest.
   1851  *  Note 2:
   1852  *      [1/log(10)] rounded to 53 bits has error .198 ulps;
   1853  *      log10 is monotonic at all binary break points.
   1854  *
   1855  *  Special cases:
   1856  *      log10(x) is NaN if x < 0;
   1857  *      log10(+INF) is +INF; log10(0) is -INF;
   1858  *      log10(NaN) is that NaN;
   1859  *      log10(10**N) = N  for N=0,1,...,22.
   1860  */
   1861 double log10(double x) {
   1862   static const double
   1863       two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
   1864       ivln10 = 4.34294481903251816668e-01,
   1865       log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
   1866       log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
   1867 
   1868   static const double zero = 0.0;
   1869   static volatile double vzero = 0.0;
   1870 
   1871   double y;
   1872   int32_t i, k, hx;
   1873   uint32_t lx;
   1874 
   1875   EXTRACT_WORDS(hx, lx, x);
   1876 
   1877   k = 0;
   1878   if (hx < 0x00100000) { /* x < 2**-1022  */
   1879     if (((hx & 0x7fffffff) | lx) == 0)
   1880       return -two54 / vzero;           /* log(+-0)=-inf */
   1881     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
   1882     k -= 54;
   1883     x *= two54; /* subnormal number, scale up x */
   1884     GET_HIGH_WORD(hx, x);
   1885     GET_LOW_WORD(lx, x);
   1886   }
   1887   if (hx >= 0x7ff00000) return x + x;
   1888   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
   1889   k += (hx >> 20) - 1023;
   1890 
   1891   i = (k & 0x80000000) >> 31;
   1892   hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
   1893   y = k + i;
   1894   SET_HIGH_WORD(x, hx);
   1895   SET_LOW_WORD(x, lx);
   1896 
   1897   double z = y * log10_2lo + ivln10 * log(x);
   1898   return z + y * log10_2hi;
   1899 }
   1900 
   1901 /* expm1(x)
   1902  * Returns exp(x)-1, the exponential of x minus 1.
   1903  *
   1904  * Method
   1905  *   1. Argument reduction:
   1906  *  Given x, find r and integer k such that
   1907  *
   1908  *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
   1909  *
   1910  *      Here a correction term c will be computed to compensate
   1911  *  the error in r when rounded to a floating-point number.
   1912  *
   1913  *   2. Approximating expm1(r) by a special rational function on
   1914  *  the interval [0,0.34658]:
   1915  *  Since
   1916  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
   1917  *  we define R1(r*r) by
   1918  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
   1919  *  That is,
   1920  *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
   1921  *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
   1922  *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
   1923  *      We use a special Reme algorithm on [0,0.347] to generate
   1924  *   a polynomial of degree 5 in r*r to approximate R1. The
   1925  *  maximum error of this polynomial approximation is bounded
   1926  *  by 2**-61. In other words,
   1927  *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
   1928  *  where   Q1  =  -1.6666666666666567384E-2,
   1929  *     Q2  =   3.9682539681370365873E-4,
   1930  *     Q3  =  -9.9206344733435987357E-6,
   1931  *     Q4  =   2.5051361420808517002E-7,
   1932  *     Q5  =  -6.2843505682382617102E-9;
   1933  *    z   =  r*r,
   1934  *  with error bounded by
   1935  *      |                  5           |     -61
   1936  *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
   1937  *      |                              |
   1938  *
   1939  *  expm1(r) = exp(r)-1 is then computed by the following
   1940  *   specific way which minimize the accumulation rounding error:
   1941  *             2     3
   1942  *            r     r    [ 3 - (R1 + R1*r/2)  ]
   1943  *        expm1(r) = r + --- + --- * [--------------------]
   1944  *                  2     2    [ 6 - r*(3 - R1*r/2) ]
   1945  *
   1946  *  To compensate the error in the argument reduction, we use
   1947  *    expm1(r+c) = expm1(r) + c + expm1(r)*c
   1948  *         ~ expm1(r) + c + r*c
   1949  *  Thus c+r*c will be added in as the correction terms for
   1950  *  expm1(r+c). Now rearrange the term to avoid optimization
   1951  *   screw up:
   1952  *            (      2                                    2 )
   1953  *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
   1954  *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
   1955  *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
   1956  *                      (                                             )
   1957  *
   1958  *       = r - E
   1959  *   3. Scale back to obtain expm1(x):
   1960  *  From step 1, we have
   1961  *     expm1(x) = either 2^k*[expm1(r)+1] - 1
   1962  *        = or     2^k*[expm1(r) + (1-2^-k)]
   1963  *   4. Implementation notes:
   1964  *  (A). To save one multiplication, we scale the coefficient Qi
   1965  *       to Qi*2^i, and replace z by (x^2)/2.
   1966  *  (B). To achieve maximum accuracy, we compute expm1(x) by
   1967  *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
   1968  *    (ii)  if k=0, return r-E
   1969  *    (iii) if k=-1, return 0.5*(r-E)-0.5
   1970  *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
   1971  *                  else       return  1.0+2.0*(r-E);
   1972  *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
   1973  *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
   1974  *    (vii) return 2^k(1-((E+2^-k)-r))
   1975  *
   1976  * Special cases:
   1977  *  expm1(INF) is INF, expm1(NaN) is NaN;
   1978  *  expm1(-INF) is -1, and
   1979  *  for finite argument, only expm1(0)=0 is exact.
   1980  *
   1981  * Accuracy:
   1982  *  according to an error analysis, the error is always less than
   1983  *  1 ulp (unit in the last place).
   1984  *
   1985  * Misc. info.
   1986  *  For IEEE double
   1987  *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
   1988  *
   1989  * Constants:
   1990  * The hexadecimal values are the intended ones for the following
   1991  * constants. The decimal values may be used, provided that the
   1992  * compiler will convert from decimal to binary accurately enough
   1993  * to produce the hexadecimal values shown.
   1994  */
   1995 double expm1(double x) {
   1996   static const double
   1997       one = 1.0,
   1998       tiny = 1.0e-300,
   1999       o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
   2000       ln2_hi = 6.93147180369123816490e-01,      /* 0x3fe62e42, 0xfee00000 */
   2001       ln2_lo = 1.90821492927058770002e-10,      /* 0x3dea39ef, 0x35793c76 */
   2002       invln2 = 1.44269504088896338700e+00,      /* 0x3ff71547, 0x652b82fe */
   2003       /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
   2004          x*x/2: */
   2005       Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
   2006       Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
   2007       Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
   2008       Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
   2009       Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
   2010 
   2011   static volatile double huge = 1.0e+300;
   2012 
   2013   double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
   2014   int32_t k, xsb;
   2015   uint32_t hx;
   2016 
   2017   GET_HIGH_WORD(hx, x);
   2018   xsb = hx & 0x80000000; /* sign bit of x */
   2019   hx &= 0x7fffffff;      /* high word of |x| */
   2020 
   2021   /* filter out huge and non-finite argument */
   2022   if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
   2023     if (hx >= 0x40862E42) { /* if |x|>=709.78... */
   2024       if (hx >= 0x7ff00000) {
   2025         uint32_t low;
   2026         GET_LOW_WORD(low, x);
   2027         if (((hx & 0xfffff) | low) != 0)
   2028           return x + x; /* NaN */
   2029         else
   2030           return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
   2031       }
   2032       if (x > o_threshold) return huge * huge; /* overflow */
   2033     }
   2034     if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
   2035       if (x + tiny < 0.0)  /* raise inexact */
   2036         return tiny - one; /* return -1 */
   2037     }
   2038   }
   2039 
   2040   /* argument reduction */
   2041   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
   2042     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
   2043       if (xsb == 0) {
   2044         hi = x - ln2_hi;
   2045         lo = ln2_lo;
   2046         k = 1;
   2047       } else {
   2048         hi = x + ln2_hi;
   2049         lo = -ln2_lo;
   2050         k = -1;
   2051       }
   2052     } else {
   2053       k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
   2054       t = k;
   2055       hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
   2056       lo = t * ln2_lo;
   2057     }
   2058     STRICT_ASSIGN(double, x, hi - lo);
   2059     c = (hi - x) - lo;
   2060   } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
   2061     t = huge + x;               /* return x with inexact flags when x!=0 */
   2062     return x - (t - (huge + x));
   2063   } else {
   2064     k = 0;
   2065   }
   2066 
   2067   /* x is now in primary range */
   2068   hfx = 0.5 * x;
   2069   hxs = x * hfx;
   2070   r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
   2071   t = 3.0 - r1 * hfx;
   2072   e = hxs * ((r1 - t) / (6.0 - x * t));
   2073   if (k == 0) {
   2074     return x - (x * e - hxs); /* c is 0 */
   2075   } else {
   2076     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
   2077     e = (x * (e - c) - c);
   2078     e -= hxs;
   2079     if (k == -1) return 0.5 * (x - e) - 0.5;
   2080     if (k == 1) {
   2081       if (x < -0.25)
   2082         return -2.0 * (e - (x + 0.5));
   2083       else
   2084         return one + 2.0 * (x - e);
   2085     }
   2086     if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
   2087       y = one - (e - x);
   2088       // TODO(mvstanton): is this replacement for the hex float
   2089       // sufficient?
   2090       // if (k == 1024) y = y*2.0*0x1p1023;
   2091       if (k == 1024)
   2092         y = y * 2.0 * 8.98846567431158e+307;
   2093       else
   2094         y = y * twopk;
   2095       return y - one;
   2096     }
   2097     t = one;
   2098     if (k < 20) {
   2099       SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
   2100       y = t - (e - x);
   2101       y = y * twopk;
   2102     } else {
   2103       SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
   2104       y = x - (e + t);
   2105       y += one;
   2106       y = y * twopk;
   2107     }
   2108   }
   2109   return y;
   2110 }
   2111 
   2112 double cbrt(double x) {
   2113   static const uint32_t
   2114       B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
   2115       B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
   2116 
   2117   /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
   2118   static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
   2119       P1 = -1.88497979543377169875,                /* 0xbffe28e0, 0x92f02420 */
   2120       P2 = 1.621429720105354466140,                /* 0x3ff9f160, 0x4a49d6c2 */
   2121       P3 = -0.758397934778766047437,               /* 0xbfe844cb, 0xbee751d9 */
   2122       P4 = 0.145996192886612446982;                /* 0x3fc2b000, 0xd4e4edd7 */
   2123 
   2124   int32_t hx;
   2125   union {
   2126     double value;
   2127     uint64_t bits;
   2128   } u;
   2129   double r, s, t = 0.0, w;
   2130   uint32_t sign;
   2131   uint32_t high, low;
   2132 
   2133   EXTRACT_WORDS(hx, low, x);
   2134   sign = hx & 0x80000000; /* sign= sign(x) */
   2135   hx ^= sign;
   2136   if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
   2137 
   2138   /*
   2139    * Rough cbrt to 5 bits:
   2140    *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
   2141    * where e is integral and >= 0, m is real and in [0, 1), and "/" and
   2142    * "%" are integer division and modulus with rounding towards minus
   2143    * infinity.  The RHS is always >= the LHS and has a maximum relative
   2144    * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
   2145    * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
   2146    * floating point representation, for finite positive normal values,
   2147    * ordinary integer divison of the value in bits magically gives
   2148    * almost exactly the RHS of the above provided we first subtract the
   2149    * exponent bias (1023 for doubles) and later add it back.  We do the
   2150    * subtraction virtually to keep e >= 0 so that ordinary integer
   2151    * division rounds towards minus infinity; this is also efficient.
   2152    */
   2153   if (hx < 0x00100000) {             /* zero or subnormal? */
   2154     if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
   2155     SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
   2156     t *= x;
   2157     GET_HIGH_WORD(high, t);
   2158     INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
   2159   } else {
   2160     INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
   2161   }
   2162 
   2163   /*
   2164    * New cbrt to 23 bits:
   2165    *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
   2166    * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
   2167    * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
   2168    * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
   2169    * gives us bounds for r = t**3/x.
   2170    *
   2171    * Try to optimize for parallel evaluation as in k_tanf.c.
   2172    */
   2173   r = (t * t) * (t / x);
   2174   t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
   2175 
   2176   /*
   2177    * Round t away from zero to 23 bits (sloppily except for ensuring that
   2178    * the result is larger in magnitude than cbrt(x) but not much more than
   2179    * 2 23-bit ulps larger).  With rounding towards zero, the error bound
   2180    * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
   2181    * in the rounded t, the infinite-precision error in the Newton
   2182    * approximation barely affects third digit in the final error
   2183    * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
   2184    * before the final error is larger than 0.667 ulps.
   2185    */
   2186   u.value = t;
   2187   u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
   2188   t = u.value;
   2189 
   2190   /* one step Newton iteration to 53 bits with error < 0.667 ulps */
   2191   s = t * t;             /* t*t is exact */
   2192   r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
   2193   w = t + t;             /* t+t is exact */
   2194   r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
   2195   t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
   2196 
   2197   return (t);
   2198 }
   2199 
   2200 /* sin(x)
   2201  * Return sine function of x.
   2202  *
   2203  * kernel function:
   2204  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   2205  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   2206  *      __ieee754_rem_pio2      ... argument reduction routine
   2207  *
   2208  * Method.
   2209  *      Let S,C and T denote the sin, cos and tan respectively on
   2210  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   2211  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   2212  *      We have
   2213  *
   2214  *          n        sin(x)      cos(x)        tan(x)
   2215  *     ----------------------------------------------------------
   2216  *          0          S           C             T
   2217  *          1          C          -S            -1/T
   2218  *          2         -S          -C             T
   2219  *          3         -C           S            -1/T
   2220  *     ----------------------------------------------------------
   2221  *
   2222  * Special cases:
   2223  *      Let trig be any of sin, cos, or tan.
   2224  *      trig(+-INF)  is NaN, with signals;
   2225  *      trig(NaN)    is that NaN;
   2226  *
   2227  * Accuracy:
   2228  *      TRIG(x) returns trig(x) nearly rounded
   2229  */
   2230 double sin(double x) {
   2231   double y[2], z = 0.0;
   2232   int32_t n, ix;
   2233 
   2234   /* High word of x. */
   2235   GET_HIGH_WORD(ix, x);
   2236 
   2237   /* |x| ~< pi/4 */
   2238   ix &= 0x7fffffff;
   2239   if (ix <= 0x3fe921fb) {
   2240     return __kernel_sin(x, z, 0);
   2241   } else if (ix >= 0x7ff00000) {
   2242     /* sin(Inf or NaN) is NaN */
   2243     return x - x;
   2244   } else {
   2245     /* argument reduction needed */
   2246     n = __ieee754_rem_pio2(x, y);
   2247     switch (n & 3) {
   2248       case 0:
   2249         return __kernel_sin(y[0], y[1], 1);
   2250       case 1:
   2251         return __kernel_cos(y[0], y[1]);
   2252       case 2:
   2253         return -__kernel_sin(y[0], y[1], 1);
   2254       default:
   2255         return -__kernel_cos(y[0], y[1]);
   2256     }
   2257   }
   2258 }
   2259 
   2260 /* tan(x)
   2261  * Return tangent function of x.
   2262  *
   2263  * kernel function:
   2264  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   2265  *      __ieee754_rem_pio2      ... argument reduction routine
   2266  *
   2267  * Method.
   2268  *      Let S,C and T denote the sin, cos and tan respectively on
   2269  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   2270  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   2271  *      We have
   2272  *
   2273  *          n        sin(x)      cos(x)        tan(x)
   2274  *     ----------------------------------------------------------
   2275  *          0          S           C             T
   2276  *          1          C          -S            -1/T
   2277  *          2         -S          -C             T
   2278  *          3         -C           S            -1/T
   2279  *     ----------------------------------------------------------
   2280  *
   2281  * Special cases:
   2282  *      Let trig be any of sin, cos, or tan.
   2283  *      trig(+-INF)  is NaN, with signals;
   2284  *      trig(NaN)    is that NaN;
   2285  *
   2286  * Accuracy:
   2287  *      TRIG(x) returns trig(x) nearly rounded
   2288  */
   2289 double tan(double x) {
   2290   double y[2], z = 0.0;
   2291   int32_t n, ix;
   2292 
   2293   /* High word of x. */
   2294   GET_HIGH_WORD(ix, x);
   2295 
   2296   /* |x| ~< pi/4 */
   2297   ix &= 0x7fffffff;
   2298   if (ix <= 0x3fe921fb) {
   2299     return __kernel_tan(x, z, 1);
   2300   } else if (ix >= 0x7ff00000) {
   2301     /* tan(Inf or NaN) is NaN */
   2302     return x - x; /* NaN */
   2303   } else {
   2304     /* argument reduction needed */
   2305     n = __ieee754_rem_pio2(x, y);
   2306     /* 1 -> n even, -1 -> n odd */
   2307     return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
   2308   }
   2309 }
   2310 
   2311 }  // namespace ieee754
   2312 }  // namespace base
   2313 }  // namespace v8
   2314