Home | History | Annotate | Download | only in video
      1 /* @(#)e_sqrt.c 5.1 93/09/24 */
      2 /*
      3  * ====================================================
      4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
      5  *
      6  * Developed at SunPro, a Sun Microsystems, Inc. business.
      7  * Permission to use, copy, modify, and distribute this
      8  * software is freely granted, provided that this notice
      9  * is preserved.
     10  * ====================================================
     11  */
     12 
     13 #if defined(LIBM_SCCS) && !defined(lint)
     14 static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
     15 #endif
     16 
     17 /* __ieee754_sqrt(x)
     18  * Return correctly rounded sqrt.
     19  *           ------------------------------------------
     20  *	     |  Use the hardware sqrt if you have one |
     21  *           ------------------------------------------
     22  * Method:
     23  *   Bit by bit method using integer arithmetic. (Slow, but portable)
     24  *   1. Normalization
     25  *	Scale x to y in [1,4) with even powers of 2:
     26  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
     27  *		sqrt(x) = 2^k * sqrt(y)
     28  *   2. Bit by bit computation
     29  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
     30  *	     i							 0
     31  *                                     i+1         2
     32  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
     33  *	     i      i            i                 i
     34  *
     35  *	To compute q    from q , one checks whether
     36  *		    i+1       i
     37  *
     38  *			      -(i+1) 2
     39  *			(q + 2      ) <= y.			(2)
     40  *     			  i
     41  *							      -(i+1)
     42  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
     43  *		 	       i+1   i             i+1   i
     44  *
     45  *	With some algebric manipulation, it is not difficult to see
     46  *	that (2) is equivalent to
     47  *                             -(i+1)
     48  *			s  +  2       <= y			(3)
     49  *			 i                i
     50  *
     51  *	The advantage of (3) is that s  and y  can be computed by
     52  *				      i      i
     53  *	the following recurrence formula:
     54  *	    if (3) is false
     55  *
     56  *	    s     =  s  ,	y    = y   ;			(4)
     57  *	     i+1      i		 i+1    i
     58  *
     59  *	    otherwise,
     60  *                         -i                     -(i+1)
     61  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
     62  *           i+1      i          i+1    i     i
     63  *
     64  *	One may easily use induction to prove (4) and (5).
     65  *	Note. Since the left hand side of (3) contain only i+2 bits,
     66  *	      it does not necessary to do a full (53-bit) comparison
     67  *	      in (3).
     68  *   3. Final rounding
     69  *	After generating the 53 bits result, we compute one more bit.
     70  *	Together with the remainder, we can decide whether the
     71  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
     72  *	(it will never equal to 1/2ulp).
     73  *	The rounding mode can be detected by checking whether
     74  *	huge + tiny is equal to huge, and whether huge - tiny is
     75  *	equal to huge for some floating point number "huge" and "tiny".
     76  *
     77  * Special cases:
     78  *	sqrt(+-0) = +-0 	... exact
     79  *	sqrt(inf) = inf
     80  *	sqrt(-ve) = NaN		... with invalid signal
     81  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
     82  *
     83  * Other methods : see the appended file at the end of the program below.
     84  *---------------
     85  */
     86 
     87 /*#include "math.h"*/
     88 #include "math_private.h"
     89 
     90 #ifdef __STDC__
     91 	double SDL_NAME(copysign)(double x, double y)
     92 #else
     93 	double SDL_NAME(copysign)(x,y)
     94 	double x,y;
     95 #endif
     96 {
     97 	u_int32_t hx,hy;
     98 	GET_HIGH_WORD(hx,x);
     99 	GET_HIGH_WORD(hy,y);
    100 	SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
    101         return x;
    102 }
    103 
    104 #ifdef __STDC__
    105 	double SDL_NAME(scalbn) (double x, int n)
    106 #else
    107 	double SDL_NAME(scalbn) (x,n)
    108 	double x; int n;
    109 #endif
    110 {
    111 	int32_t k,hx,lx;
    112 	EXTRACT_WORDS(hx,lx,x);
    113         k = (hx&0x7ff00000)>>20;		/* extract exponent */
    114         if (k==0) {				/* 0 or subnormal x */
    115             if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
    116 	    x *= two54;
    117 	    GET_HIGH_WORD(hx,x);
    118 	    k = ((hx&0x7ff00000)>>20) - 54;
    119             if (n< -50000) return tiny*x; 	/*underflow*/
    120 	    }
    121         if (k==0x7ff) return x+x;		/* NaN or Inf */
    122         k = k+n;
    123         if (k >  0x7fe) return huge*SDL_NAME(copysign)(huge,x); /* overflow  */
    124         if (k > 0) 				/* normal result */
    125 	    {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
    126         if (k <= -54) {
    127             if (n > 50000) 	/* in case integer overflow in n+k */
    128 		return huge*SDL_NAME(copysign)(huge,x);	/*overflow*/
    129 	    else return tiny*SDL_NAME(copysign)(tiny,x); 	/*underflow*/
    130 	}
    131         k += 54;				/* subnormal result */
    132 	SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
    133         return x*twom54;
    134 }
    135 
    136 #ifdef __STDC__
    137 	double __ieee754_sqrt(double x)
    138 #else
    139 	double __ieee754_sqrt(x)
    140 	double x;
    141 #endif
    142 {
    143 	double z;
    144 	int32_t sign = (int)0x80000000;
    145 	int32_t ix0,s0,q,m,t,i;
    146 	u_int32_t r,t1,s1,ix1,q1;
    147 
    148 	EXTRACT_WORDS(ix0,ix1,x);
    149 
    150     /* take care of Inf and NaN */
    151 	if((ix0&0x7ff00000)==0x7ff00000) {
    152 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
    153 					   sqrt(-inf)=sNaN */
    154 	}
    155     /* take care of zero */
    156 	if(ix0<=0) {
    157 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
    158 	    else if(ix0<0)
    159 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
    160 	}
    161     /* normalize x */
    162 	m = (ix0>>20);
    163 	if(m==0) {				/* subnormal x */
    164 	    while(ix0==0) {
    165 		m -= 21;
    166 		ix0 |= (ix1>>11); ix1 <<= 21;
    167 	    }
    168 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
    169 	    m -= i-1;
    170 	    ix0 |= (ix1>>(32-i));
    171 	    ix1 <<= i;
    172 	}
    173 	m -= 1023;	/* unbias exponent */
    174 	ix0 = (ix0&0x000fffff)|0x00100000;
    175 	if(m&1){	/* odd m, double x to make it even */
    176 	    ix0 += ix0 + ((ix1&sign)>>31);
    177 	    ix1 += ix1;
    178 	}
    179 	m >>= 1;	/* m = [m/2] */
    180 
    181     /* generate sqrt(x) bit by bit */
    182 	ix0 += ix0 + ((ix1&sign)>>31);
    183 	ix1 += ix1;
    184 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
    185 	r = 0x00200000;		/* r = moving bit from right to left */
    186 
    187 	while(r!=0) {
    188 	    t = s0+r;
    189 	    if(t<=ix0) {
    190 		s0   = t+r;
    191 		ix0 -= t;
    192 		q   += r;
    193 	    }
    194 	    ix0 += ix0 + ((ix1&sign)>>31);
    195 	    ix1 += ix1;
    196 	    r>>=1;
    197 	}
    198 
    199 	r = sign;
    200 	while(r!=0) {
    201 	    t1 = s1+r;
    202 	    t  = s0;
    203 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
    204 		s1  = t1+r;
    205 		if(((int32_t)(t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
    206 		ix0 -= t;
    207 		if (ix1 < t1) ix0 -= 1;
    208 		ix1 -= t1;
    209 		q1  += r;
    210 	    }
    211 	    ix0 += ix0 + ((ix1&sign)>>31);
    212 	    ix1 += ix1;
    213 	    r>>=1;
    214 	}
    215 
    216     /* use floating add to find out rounding direction */
    217 	if((ix0|ix1)!=0) {
    218 	    z = one-tiny; /* trigger inexact flag */
    219 	    if (z>=one) {
    220 	        z = one+tiny;
    221 	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
    222 		else if (z>one) {
    223 		    if (q1==(u_int32_t)0xfffffffe) q+=1;
    224 		    q1+=2;
    225 		} else
    226 	            q1 += (q1&1);
    227 	    }
    228 	}
    229 	ix0 = (q>>1)+0x3fe00000;
    230 	ix1 =  q1>>1;
    231 	if ((q&1)==1) ix1 |= sign;
    232 	ix0 += (m <<20);
    233 	INSERT_WORDS(z,ix0,ix1);
    234 	return z;
    235 }
    236 
    237 /*
    238 Other methods  (use floating-point arithmetic)
    239 -------------
    240 (This is a copy of a drafted paper by Prof W. Kahan
    241 and K.C. Ng, written in May, 1986)
    242 
    243 	Two algorithms are given here to implement sqrt(x)
    244 	(IEEE double precision arithmetic) in software.
    245 	Both supply sqrt(x) correctly rounded. The first algorithm (in
    246 	Section A) uses newton iterations and involves four divisions.
    247 	The second one uses reciproot iterations to avoid division, but
    248 	requires more multiplications. Both algorithms need the ability
    249 	to chop results of arithmetic operations instead of round them,
    250 	and the INEXACT flag to indicate when an arithmetic operation
    251 	is executed exactly with no roundoff error, all part of the
    252 	standard (IEEE 754-1985). The ability to perform shift, add,
    253 	subtract and logical AND operations upon 32-bit words is needed
    254 	too, though not part of the standard.
    255 
    256 A.  sqrt(x) by Newton Iteration
    257 
    258    (1)	Initial approximation
    259 
    260 	Let x0 and x1 be the leading and the trailing 32-bit words of
    261 	a floating point number x (in IEEE double format) respectively
    262 
    263 	    1    11		     52				  ...widths
    264 	   ------------------------------------------------------
    265 	x: |s|	  e     |	      f				|
    266 	   ------------------------------------------------------
    267 	      msb    lsb  msb				      lsb ...order
    268 
    269 
    270 	     ------------------------  	     ------------------------
    271 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
    272 	     ------------------------  	     ------------------------
    273 
    274 	By performing shifts and subtracts on x0 and x1 (both regarded
    275 	as integers), we obtain an 8-bit approximation of sqrt(x) as
    276 	follows.
    277 
    278 		k  := (x0>>1) + 0x1ff80000;
    279 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
    280 	Here k is a 32-bit integer and T1[] is an integer array containing
    281 	correction terms. Now magically the floating value of y (y's
    282 	leading 32-bit word is y0, the value of its trailing word is 0)
    283 	approximates sqrt(x) to almost 8-bit.
    284 
    285 	Value of T1:
    286 	static int T1[32]= {
    287 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
    288 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
    289 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
    290 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
    291 
    292     (2)	Iterative refinement
    293 
    294 	Apply Heron's rule three times to y, we have y approximates
    295 	sqrt(x) to within 1 ulp (Unit in the Last Place):
    296 
    297 		y := (y+x/y)/2		... almost 17 sig. bits
    298 		y := (y+x/y)/2		... almost 35 sig. bits
    299 		y := y-(y-x/y)/2	... within 1 ulp
    300 
    301 
    302 	Remark 1.
    303 	    Another way to improve y to within 1 ulp is:
    304 
    305 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
    306 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
    307 
    308 				2
    309 			    (x-y )*y
    310 		y := y + 2* ----------	...within 1 ulp
    311 			       2
    312 			     3y  + x
    313 
    314 
    315 	This formula has one division fewer than the one above; however,
    316 	it requires more multiplications and additions. Also x must be
    317 	scaled in advance to avoid spurious overflow in evaluating the
    318 	expression 3y*y+x. Hence it is not recommended uless division
    319 	is slow. If division is very slow, then one should use the
    320 	reciproot algorithm given in section B.
    321 
    322     (3) Final adjustment
    323 
    324 	By twiddling y's last bit it is possible to force y to be
    325 	correctly rounded according to the prevailing rounding mode
    326 	as follows. Let r and i be copies of the rounding mode and
    327 	inexact flag before entering the square root program. Also we
    328 	use the expression y+-ulp for the next representable floating
    329 	numbers (up and down) of y. Note that y+-ulp = either fixed
    330 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
    331 	mode.
    332 
    333 		I := FALSE;	... reset INEXACT flag I
    334 		R := RZ;	... set rounding mode to round-toward-zero
    335 		z := x/y;	... chopped quotient, possibly inexact
    336 		If(not I) then {	... if the quotient is exact
    337 		    if(z=y) {
    338 		        I := i;	 ... restore inexact flag
    339 		        R := r;  ... restore rounded mode
    340 		        return sqrt(x):=y.
    341 		    } else {
    342 			z := z - ulp;	... special rounding
    343 		    }
    344 		}
    345 		i := TRUE;		... sqrt(x) is inexact
    346 		If (r=RN) then z=z+ulp	... rounded-to-nearest
    347 		If (r=RP) then {	... round-toward-+inf
    348 		    y = y+ulp; z=z+ulp;
    349 		}
    350 		y := y+z;		... chopped sum
    351 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
    352 	        I := i;	 		... restore inexact flag
    353 	        R := r;  		... restore rounded mode
    354 	        return sqrt(x):=y.
    355 
    356     (4)	Special cases
    357 
    358 	Square root of +inf, +-0, or NaN is itself;
    359 	Square root of a negative number is NaN with invalid signal.
    360 
    361 
    362 B.  sqrt(x) by Reciproot Iteration
    363 
    364    (1)	Initial approximation
    365 
    366 	Let x0 and x1 be the leading and the trailing 32-bit words of
    367 	a floating point number x (in IEEE double format) respectively
    368 	(see section A). By performing shifs and subtracts on x0 and y0,
    369 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
    370 
    371 	    k := 0x5fe80000 - (x0>>1);
    372 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
    373 
    374 	Here k is a 32-bit integer and T2[] is an integer array
    375 	containing correction terms. Now magically the floating
    376 	value of y (y's leading 32-bit word is y0, the value of
    377 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
    378 	to almost 7.8-bit.
    379 
    380 	Value of T2:
    381 	static int T2[64]= {
    382 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
    383 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
    384 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
    385 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
    386 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
    387 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
    388 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
    389 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
    390 
    391     (2)	Iterative refinement
    392 
    393 	Apply Reciproot iteration three times to y and multiply the
    394 	result by x to get an approximation z that matches sqrt(x)
    395 	to about 1 ulp. To be exact, we will have
    396 		-1ulp < sqrt(x)-z<1.0625ulp.
    397 
    398 	... set rounding mode to Round-to-nearest
    399 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
    400 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
    401 	... special arrangement for better accuracy
    402 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
    403 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
    404 
    405 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
    406 	(a) the term z*y in the final iteration is always less than 1;
    407 	(b) the error in the final result is biased upward so that
    408 		-1 ulp < sqrt(x) - z < 1.0625 ulp
    409 	    instead of |sqrt(x)-z|<1.03125ulp.
    410 
    411     (3)	Final adjustment
    412 
    413 	By twiddling y's last bit it is possible to force y to be
    414 	correctly rounded according to the prevailing rounding mode
    415 	as follows. Let r and i be copies of the rounding mode and
    416 	inexact flag before entering the square root program. Also we
    417 	use the expression y+-ulp for the next representable floating
    418 	numbers (up and down) of y. Note that y+-ulp = either fixed
    419 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
    420 	mode.
    421 
    422 	R := RZ;		... set rounding mode to round-toward-zero
    423 	switch(r) {
    424 	    case RN:		... round-to-nearest
    425 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
    426 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
    427 	       break;
    428 	    case RZ:case RM:	... round-to-zero or round-to--inf
    429 	       R:=RP;		... reset rounding mod to round-to-+inf
    430 	       if(x<z*z ... rounded up) z = z - ulp; else
    431 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
    432 	       break;
    433 	    case RP:		... round-to-+inf
    434 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
    435 	       if(x>z*z ...chopped) z = z+ulp;
    436 	       break;
    437 	}
    438 
    439 	Remark 3. The above comparisons can be done in fixed point. For
    440 	example, to compare x and w=z*z chopped, it suffices to compare
    441 	x1 and w1 (the trailing parts of x and w), regarding them as
    442 	two's complement integers.
    443 
    444 	...Is z an exact square root?
    445 	To determine whether z is an exact square root of x, let z1 be the
    446 	trailing part of z, and also let x0 and x1 be the leading and
    447 	trailing parts of x.
    448 
    449 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
    450 	    I := 1;		... Raise Inexact flag: z is not exact
    451 	else {
    452 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
    453 	    k := z1 >> 26;		... get z's 25-th and 26-th
    454 					    fraction bits
    455 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
    456 	}
    457 	R:= r		... restore rounded mode
    458 	return sqrt(x):=z.
    459 
    460 	If multiplication is cheaper then the foregoing red tape, the
    461 	Inexact flag can be evaluated by
    462 
    463 	    I := i;
    464 	    I := (z*z!=x) or I.
    465 
    466 	Note that z*z can overwrite I; this value must be sensed if it is
    467 	True.
    468 
    469 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
    470 	zero.
    471 
    472 		    --------------------
    473 		z1: |        f2        |
    474 		    --------------------
    475 		bit 31		   bit 0
    476 
    477 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
    478 	or even of logb(x) have the following relations:
    479 
    480 	-------------------------------------------------
    481 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
    482 	-------------------------------------------------
    483 	00			00		odd and even
    484 	01			01		even
    485 	10			10		odd
    486 	10			00		even
    487 	11			01		even
    488 	-------------------------------------------------
    489 
    490     (4)	Special cases (see (4) of Section A).
    491 
    492  */
    493 
    494