Home | History | Annotate | Download | only in ppc32
      1 
      2 /*  Copyright (C) 2006 Dave Nomura
      3        dcnltc (at) us.ibm.com
      4 
      5     This program is free software; you can redistribute it and/or
      6     modify it under the terms of the GNU General Public License as
      7     published by the Free Software Foundation; either version 2 of the
      8     License, or (at your option) any later version.
      9 
     10     This program is distributed in the hope that it will be useful, but
     11     WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13     General Public License for more details.
     14 
     15     You should have received a copy of the GNU General Public License
     16     along with this program; if not, write to the Free Software
     17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18     02111-1307, USA.
     19 
     20     The GNU General Public License is contained in the file COPYING.
     21 */
     22 
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 #include <limits.h>
     26 
     27 typedef enum { FALSE=0, TRUE } bool_t;
     28 
     29 typedef enum {
     30 	FADDS, FSUBS, FMULS, FDIVS,
     31 	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
     32 	FADD, FSUB, FMUL, FDIV, FMADD,
     33 	FMSUB, FNMADD, FNMSUB, FSQRT
     34 } flt_op_t;
     35 
     36 typedef enum {
     37 	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
     38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
     39 
     40 const char *flt_op_names[] = {
     41 	"fadds", "fsubs", "fmuls", "fdivs",
     42 	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
     43 	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
     44 	"fnmsub", "fsqrt"
     45 };
     46 
     47 typedef unsigned int fpscr_t;
     48 
     49 typedef union {
     50 	float flt;
     51 	struct {
     52 		unsigned int sign:1;
     53 		unsigned int exp:8;
     54 		unsigned int frac:23;
     55 	} layout;
     56 } flt_overlay;
     57 
     58 typedef union {
     59 	double dbl;
     60 	struct {
     61 		unsigned int sign:1;
     62 		unsigned int exp:11;
     63 		unsigned int frac_hi:20;
     64 		unsigned int frac_lo:32;
     65 	} layout;
     66 	struct {
     67 		unsigned int hi;
     68 		unsigned int lo;
     69 	} dbl_pair;
     70 } dbl_overlay;
     71 
     72 void assert_fail(const char *msg,
     73 	const char* expr, const char* file, int line, const char*fn);
     74 
     75 #define STRING(__str)  #__str
     76 #define assert(msg, expr)                                           \
     77   ((void) ((expr) ? 0 :                                         \
     78            (assert_fail (msg, STRING(expr),                  \
     79                              __FILE__, __LINE__,                \
     80                              __PRETTY_FUNCTION__), 0)))
     81 float denorm_small;
     82 double dbl_denorm_small;
     83 float norm_small;
     84 bool_t debug = FALSE;
     85 bool_t long_is_64_bits = sizeof(long) == 8;
     86 
     87 void assert_fail (msg, expr, file, line, fn)
     88 const char* msg;
     89 const char* expr;
     90 const char* file;
     91 int line;
     92 const char*fn;
     93 {
     94    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
     95                msg, file, line, fn, expr );
     96    exit( 1 );
     97 }
     98 void set_rounding_mode(round_mode_t mode)
     99 {
    100 	switch(mode) {
    101 	case TO_NEAREST:
    102 		asm volatile("mtfsfi 7, 0");
    103 		break;
    104 	case TO_ZERO:
    105 		asm volatile("mtfsfi 7, 1");
    106 		break;
    107 	case TO_PLUS_INFINITY:
    108 		asm volatile("mtfsfi 7, 2");
    109 		break;
    110 	case TO_MINUS_INFINITY:
    111 		asm volatile("mtfsfi 7, 3");
    112 		break;
    113 	}
    114 }
    115 
    116 void print_double(char *msg, double dbl)
    117 {
    118 	dbl_overlay D;
    119 	D.dbl = dbl;
    120 
    121 	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
    122 			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
    123 			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
    124 }
    125 
    126 void print_single(char *msg, float *flt)
    127 {
    128 	flt_overlay F;
    129 	F.flt = *flt;
    130 
    131 	/* NOTE: for the purposes of comparing the fraction of a single with
    132 	**       a double left shift the .frac so that hex digits are grouped
    133 	**	     from left to right.  this is necessary because the size of a
    134 	**		 single mantissa (23) bits is not a multiple of 4
    135 	*/
    136 	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
    137 		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
    138 }
    139 
    140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
    141 {
    142 	int status = 0;
    143 	flt_overlay R, E;
    144 	char *result;
    145 	char *eq_ne;
    146 
    147 	set_rounding_mode(mode);
    148 
    149 	E.flt = *expected;
    150 	R.flt = (float)dbl;
    151 
    152 	if ((R.layout.sign != E.layout.sign) ||
    153 		(R.layout.exp != E.layout.exp) ||
    154 		(R.layout.frac != E.layout.frac)) {
    155 		result = "FAILED";
    156 		eq_ne = "!=";
    157 		status = 1;
    158 	} else {
    159 		result = "PASSED";
    160 		eq_ne = "==";
    161 		status = 0;
    162 	}
    163 	printf("%s:%s:(double)(%-20a) = %20a",
    164 		round_mode_name[mode], result, R.flt, dbl);
    165 	if (status) {
    166 		print_single("\n\texpected", &E.flt);
    167 		print_single("\n\trounded ", &R.flt);
    168 	}
    169 	putchar('\n');
    170 	return status;
    171 }
    172 
    173 int test_dbl_to_float_convert(char *msg, float *base)
    174 {
    175 	int status = 0;
    176 	double half = (double)denorm_small/2;
    177 	double qtr = half/2;
    178 	double D_hi = (double)*base + half + qtr;
    179 	double D_lo = (double)*base + half - qtr;
    180 	float F_lo = *base;
    181 	float F_hi = F_lo + denorm_small;
    182 
    183 
    184 	/*
    185 	** .....+-----+-----+-----+-----+---....
    186 	**      ^F_lo ^           ^     ^
    187 	**            D_lo
    188 	**                        D_hi
    189 	**                              F_hi
    190 	** F_lo and F_hi are two consecutive single float model numbers
    191 	** denorm_small distance apart. D_lo and D_hi are two numbers
    192 	** within that range that are not representable as single floats
    193 	** and will be rounded to either F_lo or F_hi.
    194 	*/
    195 	printf("-------------------------- %s --------------------------\n", msg);
    196 	if (debug) {
    197 		print_double("D_lo", D_lo);
    198 		print_double("D_hi", D_hi);
    199 		print_single("F_lo", &F_lo);
    200 		print_single("F_hi", &F_hi);
    201 	}
    202 
    203 	/* round to nearest */
    204 	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
    205 	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
    206 
    207 	/* round to zero */
    208 	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
    209 	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
    210 
    211 	/* round to +inf */
    212 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
    213 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
    214 
    215 	/* round to -inf */
    216 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
    217 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
    218 	return status;
    219 }
    220 
    221 void
    222 init()
    223 {
    224 	flt_overlay F;
    225 	dbl_overlay D;
    226 
    227 	/* small is the smallest denormalized single float number */
    228 	F.layout.sign = 0;
    229 	F.layout.exp = 0;
    230 	F.layout.frac = 1;
    231 	denorm_small = F.flt;	/* == 2^(-149) */
    232 	if (debug) {
    233 		print_double("float small", F.flt);
    234 	}
    235 
    236 	D.layout.sign = 0;
    237 	D.layout.exp = 0;
    238 	D.layout.frac_hi = 0;
    239 	D.layout.frac_lo = 1;
    240 	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
    241 	if (debug) {
    242 		print_double("double small", D.dbl);
    243 	}
    244 
    245 	/* n_small is the smallest normalized single precision float */
    246 	F.layout.exp = 1;
    247 	norm_small = F.flt;
    248 }
    249 
    250 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
    251 {
    252 	int status = 0;
    253 	int I = L;
    254 	char *int_name = "int";
    255 	flt_overlay R, E;
    256 	char *result;
    257 	int iter;
    258 
    259 	set_rounding_mode(mode);
    260 	E.flt = *expected;
    261 
    262 	for (iter = 0; iter < 2; iter++) {
    263 		int stat = 0;
    264 		R.flt = (iter == 0 ? (float)I : (float)L);
    265 
    266 		if ((R.layout.sign != E.layout.sign) ||
    267 			(R.layout.exp != E.layout.exp) ||
    268 			(R.layout.frac != E.layout.frac)) {
    269 			result = "FAILED";
    270 			stat = 1;
    271 		} else {
    272 			result = "PASSED";
    273 			stat = 0;
    274 		}
    275 		printf("%s:%s:(float)(%4s)%9d = %11.1f",
    276 			round_mode_name[mode], result, int_name, I, R.flt);
    277 		if (stat) {
    278 			print_single("\n\texpected: %.1f ", &E.flt);
    279 			print_single("\n\trounded ", &R.flt);
    280 		}
    281 		putchar('\n');
    282 		status |= stat;
    283 
    284 		if (!long_is_64_bits) break;
    285 		int_name = "long";
    286 	}
    287 	return status;
    288 }
    289 
    290 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
    291 {
    292 	int status = 0;
    293 	dbl_overlay R, E;
    294 	char *result;
    295 
    296 	set_rounding_mode(mode);
    297 	E.dbl = *expected;
    298 
    299 	R.dbl = (double)L;
    300 
    301 	if ((R.layout.sign != E.layout.sign) ||
    302 		(R.layout.exp != E.layout.exp) ||
    303 		(R.layout.frac_lo != E.layout.frac_lo) ||
    304 		(R.layout.frac_hi != E.layout.frac_hi)) {
    305 		result = "FAILED";
    306 		status = 1;
    307 	} else {
    308 		result = "PASSED";
    309 		status = 0;
    310 	}
    311 	printf("%s:%s:(double)(%18ld) = %20.1f",
    312 		round_mode_name[mode], result, L, R.dbl);
    313 	if (status) {
    314 		printf("\n\texpected %.1f : ", E.dbl);
    315 	}
    316 	putchar('\n');
    317 	return status;
    318 }
    319 
    320 int test_int_to_float_convert(char *msg)
    321 {
    322 	int status = 0;
    323 	int int24_hi = 0x03ff0fff;
    324 	int int24_lo = 0x03ff0ffd;
    325 	float pos_flt_lo = 67047420.0;
    326 	float pos_flt_hi = 67047424.0;
    327 	float neg_flt_lo = -67047420.0;
    328 	float neg_flt_hi = -67047424.0;
    329 
    330 	printf("-------------------------- %s --------------------------\n", msg);
    331 	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
    332 	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
    333 	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
    334 	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
    335 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
    336 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
    337 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
    338 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
    339 
    340 	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
    341 	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
    342 	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
    343 	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
    344 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
    345 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
    346 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
    347 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
    348 	return status;
    349 }
    350 
    351 #ifdef __powerpc64__
    352 int test_long_to_double_convert(char *msg)
    353 {
    354 	int status = 0;
    355 	long long55_hi = 0x07ff0ffffffffff;
    356 	long long55_lo = 0x07ff0fffffffffd;
    357 	double pos_dbl_lo = 36012304344547324.0;
    358 	double pos_dbl_hi = 36012304344547328.0;
    359 	double neg_dbl_lo = -36012304344547324.0;
    360 	double neg_dbl_hi = -36012304344547328.0;
    361 
    362 	printf("-------------------------- %s --------------------------\n", msg);
    363 	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
    364 	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
    365 	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
    366 	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
    367 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
    368 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
    369 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
    370 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
    371 
    372 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
    373 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
    374 	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
    375 	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
    376 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
    377 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
    378 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
    379 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
    380 	return status;
    381 }
    382 #endif
    383 
    384 int check_single_arithmetic_op(flt_op_t op)
    385 {
    386 		char *result;
    387         int status = 0;
    388         dbl_overlay R, E;
    389         double qtr, half, fA, fB, fD;
    390 		round_mode_t mode;
    391 		int q, s;
    392 		bool_t two_args = TRUE;
    393 		float whole = denorm_small;
    394 
    395 #define BINOP(op) \
    396         __asm__ volatile( \
    397 					op" %0, %1, %2\n\t" \
    398 					: "=f"(fD) : "f"(fA) , "f"(fB));
    399 #define UNOP(op) \
    400         __asm__ volatile( \
    401 					op" %0, %1\n\t" \
    402 					: "=f"(fD) : "f"(fA));
    403 
    404 		half = (double)whole/2;
    405 		qtr = half/2;
    406 
    407 		if (debug) {
    408 			print_double("qtr", qtr);
    409 			print_double("whole", whole);
    410 			print_double("2*whole", 2*whole);
    411 		}
    412 
    413 		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    414 		for (s = -1; s < 2; s += 2)
    415 		for (q = 1; q < 4; q += 2) {
    416 			double expected;
    417 			double lo = s*whole;
    418 			double hi = s*2*whole;
    419 
    420 			switch(op) {
    421 			case FADDS:
    422 				fA = s*whole;
    423 				fB = s*q*qtr;
    424 				break;
    425 			case FSUBS:
    426 				fA = s*2*whole;
    427 				fB = s*(q == 1 ? 3 : 1)*qtr;
    428 				break;
    429 			case FMULS:
    430 				fA = 0.5;
    431 				fB = s*(4+q)*half;
    432 				break;
    433 			case FDIVS:
    434 				fA = s*(4+q)*half;
    435 				fB = 2.0;
    436 				break;
    437 			default:
    438 				assert("check_single_arithmetic_op: unexpected op",
    439 					FALSE);
    440 				break;
    441 			}
    442 
    443 			switch(mode) {
    444 			case TO_NEAREST:
    445 				expected = (q == 1 ? lo : hi);
    446 				break;
    447 			case TO_ZERO:
    448 				expected = lo;
    449 				break;
    450 			case TO_PLUS_INFINITY:
    451 				expected = (s == 1 ? hi : lo);
    452 				break;
    453 			case TO_MINUS_INFINITY:
    454 				expected = (s == 1 ? lo : hi);
    455 				break;
    456 			}
    457 
    458 			set_rounding_mode(mode);
    459 
    460 			/*
    461 			** do the double precision dual operation just for comparison
    462 			** when debugging
    463 			*/
    464 			switch(op) {
    465 			case FADDS:
    466 				BINOP("fadds");
    467 				R.dbl = fD;
    468 				BINOP("fadd");
    469 				break;
    470 			case FSUBS:
    471 				BINOP("fsubs");
    472 				R.dbl = fD;
    473 				BINOP("fsub");
    474 				break;
    475 			case FMULS:
    476 				BINOP("fmuls");
    477 				R.dbl = fD;
    478 				BINOP("fmul");
    479 				break;
    480 			case FDIVS:
    481 				BINOP("fdivs");
    482 				R.dbl = fD;
    483 				BINOP("fdiv");
    484 				break;
    485 			default:
    486 				assert("check_single_arithmetic_op: unexpected op",
    487 					FALSE);
    488 				break;
    489 			}
    490 #undef UNOP
    491 #undef BINOP
    492 
    493 			E.dbl = expected;
    494 
    495 			if ((R.layout.sign != E.layout.sign) ||
    496 				(R.layout.exp != E.layout.exp) ||
    497 				(R.layout.frac_lo != E.layout.frac_lo) ||
    498 				(R.layout.frac_hi != E.layout.frac_hi)) {
    499 				result = "FAILED";
    500 				status = 1;
    501 			} else {
    502 				result = "PASSED";
    503 				status = 0;
    504 			}
    505 
    506 			printf("%s:%s:%s(%-13a",
    507 				round_mode_name[mode], result, flt_op_names[op], fA);
    508 			if (two_args) printf(", %-13a", fB);
    509 			printf(") = %-13a", R.dbl);
    510 			if (status) printf("\n\texpected %a", E.dbl);
    511 			putchar('\n');
    512 
    513 			if (debug) {
    514 				print_double("hi", hi);
    515 				print_double("lo", lo);
    516 				print_double("expected", expected);
    517 				print_double("got", R.dbl);
    518 				print_double("double result", fD);
    519 			}
    520 		}
    521 
    522 		return status;
    523 }
    524 
    525 int check_single_guarded_arithmetic_op(flt_op_t op)
    526 {
    527 		typedef struct {
    528 			int num, den, frac;
    529 		} fdivs_t;
    530 
    531 		char *result;
    532         int status = 0;
    533         flt_overlay A, B, Z;
    534         dbl_overlay Res, Exp;
    535         double fA, fB, fC, fD;
    536 		round_mode_t mode;
    537 		int g, s;
    538 		int arg_count;
    539 
    540 		fdivs_t divs_guard_cases[16] = {
    541 			{ 105, 56, 0x700000 },  /* : 0 */
    542 			{ 100, 57, 0x608FB8 },  /* : 1 */
    543 			{ 000, 00, 0x000000 },  /* : X */
    544 			{ 100, 52, 0x762762 },  /* : 3 */
    545 			{ 000, 00, 0x000000 },  /* : X */
    546 			{ 100, 55, 0x68BA2E },  /* : 5 */
    547 			{ 000, 00, 0x000000 },  /* : X */
    548 			{ 100, 51, 0x7AFAFA },  /* : 7 */
    549 			{ 000, 00, 0x000000 },  /* : X */
    550 			{ 100, 56, 0x649249 },  /* : 9 */
    551 			{ 000, 00, 0x000000 },  /* : X */
    552 			{ 100, 54, 0x6D097B },  /* : B */
    553 			{ 000, 00, 0x000000 },  /* : X */
    554 			{ 100, 59, 0x58F2FB },  /* : D */
    555 			{ 000, 00, 0x000000 },  /* : X */
    556 			{ 101, 52, 0x789D89 }  /* : F */
    557 		};
    558 
    559 		/*	0x1.00000 00000000p-3 */
    560 		/* set up the invariant fields of B, the arg to cause rounding */
    561 		B.flt = 0.0;
    562 		B.layout.exp = 124;  /* -3 */
    563 
    564 		/* set up args so result is always Z = 1.200000000000<g>p+0 */
    565 		Z.flt = 1.0;
    566 		Z.layout.sign = 0;
    567 
    568 #define TERNOP(op) \
    569 		arg_count = 3; \
    570         __asm__ volatile( \
    571 					op" %0, %1, %2, %3\n\t" \
    572 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
    573 #define BINOP(op) \
    574 		arg_count = 2; \
    575         __asm__ volatile( \
    576 					op" %0, %1, %2\n\t" \
    577 					: "=f"(fD) : "f"(fA) , "f"(fB));
    578 #define UNOP(op) \
    579 		arg_count = 1; \
    580         __asm__ volatile( \
    581 					op" %0, %1\n\t" \
    582 					: "=f"(fD) : "f"(fA));
    583 
    584 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    585 	for (s = -1; s < 2; s += 2)
    586 	for (g = 0; g < 16; g += 1) {
    587 		double lo, hi, expected;
    588 		int LSB;
    589 		int guard = 0;
    590 		int z_sign = s;
    591 
    592 		/*
    593 		** one argument will have exponent = 0 as will the result (by
    594 		** design) so choose the other argument with exponent -3 to
    595 		** force a 3 bit shift for scaling leaving us with 3 guard bits
    596 		** and the LSB bit at the bottom of the manitssa.
    597 		*/
    598 		switch(op) {
    599 		case FADDS:
    600 			/* 1p+0 + 1.00000<g>p-3 */
    601 			B.layout.frac = g;
    602 
    603 			fB = s*B.flt;
    604 			fA = s*1.0;
    605 
    606 			/* set up Z to be truncated result */
    607 
    608 			/* mask off LSB from resulting guard bits */
    609 			guard = g & 7;
    610 
    611 			Z.layout.frac = 0x100000 | (g >> 3);
    612 			break;
    613 		case FSUBS:
    614 			/* 1.200002p+0 - 1.000000000000<g>p-3 */
    615 			A.flt = 1.125;
    616 			/* add enough to avoid scaling of the result */
    617 			A.layout.frac |= 0x2;
    618 			fA = s*A.flt;
    619 
    620 			B.layout.frac = g;
    621 			fB = s*B.flt;
    622 
    623 			/* set up Z to be truncated result */
    624 			guard = (0x10-g);
    625 			Z.layout.frac = guard>>3;
    626 
    627 			/* mask off LSB from resulting guard bits */
    628 			guard &= 7;
    629 			break;
    630 		case FMULS:
    631 			/* 1 + g*2^-23 */
    632 			A.flt = 1.0;
    633 			A.layout.frac = g;
    634 			fA = s*A.flt;
    635 			fB = 1.125;
    636 
    637 			/* set up Z to be truncated result */
    638 			Z.flt = 1.0;
    639 			Z.layout.frac = 0x100000;
    640 			Z.layout.frac |= g + (g>>3);
    641 			guard = g & 7;
    642 			break;
    643 		case FDIVS:
    644 			/* g >> 3 == LSB, g & 7 == guard bits */
    645 			guard = g & 7;
    646 			if ((guard & 1) == 0) {
    647 				/* special case: guard bit X = 0 */
    648 				A.flt = denorm_small;
    649 				A.layout.frac = g;
    650 				fA = A.flt;
    651 				fB = s*8.0;
    652 				Z.flt = 0.0;
    653 				Z.layout.frac |= (g >> 3);
    654 			} else {
    655 				fA = s*divs_guard_cases[g].num;
    656 				fB = divs_guard_cases[g].den;
    657 
    658 				Z.flt = 1.0;
    659 				Z.layout.frac = divs_guard_cases[g].frac;
    660 			}
    661 			break;
    662 		case FMADDS:
    663 		case FMSUBS:
    664 		case FNMADDS:
    665 		case FNMSUBS:
    666 			/* 1 + g*2^-23 */
    667 			A.flt = 1.0;
    668 			A.layout.frac = g;
    669 			fA = s*A.flt;
    670 			fB = 1.125;
    671 
    672 			/* 1.000001p-1 */
    673 			A.flt = 0.5;
    674 			A.layout.frac = 1;
    675 			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
    676 
    677 			/* set up Z to be truncated result */
    678 			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
    679 			guard = ((g & 7) + 0x4) & 7;
    680 			Z.flt = 1.0;
    681 			Z.layout.frac = 0x500000;
    682 			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
    683 			break;
    684 		default:
    685 			assert("check_single_arithmetic_op: unexpected op",
    686 				FALSE);
    687 			break;
    688 		}
    689 
    690 		/* get LSB for tie breaking */
    691 		LSB = Z.layout.frac & 1;
    692 
    693 		/* set up hi and lo */
    694 		lo = z_sign*Z.flt;
    695 		Z.layout.frac += 1;
    696 		hi = z_sign*Z.flt;
    697 
    698 		switch(mode) {
    699 		case TO_NEAREST:
    700 			/* look at 3 guard bits to determine expected rounding */
    701 			switch(guard) {
    702 			case 0:
    703 			case 1: case 2: case 3:
    704 				expected = lo;
    705 				break;
    706 			case 4:	/* tie: round to even */
    707 				if (debug) printf("tie: LSB = %d\n", LSB);
    708 				expected = (LSB == 0 ? lo : hi);
    709 				break;
    710 			case 5: case 6: case 7:
    711 				expected = hi;
    712 				break;
    713 			default:
    714 				assert("check_single_guarded_arithmetic_op: unexpected guard",
    715 					FALSE);
    716 			}
    717 			break;
    718 		case TO_ZERO:
    719 			expected = lo;
    720 			break;
    721 		case TO_PLUS_INFINITY:
    722 			if (guard == 0) {
    723 				/* no rounding */
    724 				expected = lo;
    725 			} else {
    726 				expected = (s == 1 ? hi : lo);
    727 			}
    728 			break;
    729 		case TO_MINUS_INFINITY:
    730 			if (guard == 0) {
    731 				/* no rounding */
    732 				expected = lo;
    733 			} else {
    734 				expected = (s == 1 ? lo : hi);
    735 			}
    736 			break;
    737 		}
    738 
    739 		set_rounding_mode(mode);
    740 
    741 		/*
    742 		** do the double precision dual operation just for comparison
    743 		** when debugging
    744 		*/
    745 		switch(op) {
    746 		case FADDS:
    747 			BINOP("fadds");
    748 			Res.dbl = fD;
    749 			break;
    750 		case FSUBS:
    751 			BINOP("fsubs");
    752 			Res.dbl = fD;
    753 			break;
    754 		case FMULS:
    755 			BINOP("fmuls");
    756 			Res.dbl = fD;
    757 			break;
    758 		case FDIVS:
    759 			BINOP("fdivs");
    760 			Res.dbl = fD;
    761 			break;
    762 		case FMADDS:
    763 			TERNOP("fmadds");
    764 			Res.dbl = fD;
    765 			break;
    766 		case FMSUBS:
    767 			TERNOP("fmsubs");
    768 			Res.dbl = fD;
    769 			break;
    770 		case FNMADDS:
    771 			TERNOP("fnmadds");
    772 			Res.dbl = fD;
    773 			break;
    774 		case FNMSUBS:
    775 			TERNOP("fnmsubs");
    776 			Res.dbl = fD;
    777 			break;
    778 		default:
    779 			assert("check_single_guarded_arithmetic_op: unexpected op",
    780 				FALSE);
    781 			break;
    782 		}
    783 #undef UNOP
    784 #undef BINOP
    785 #undef TERNOP
    786 
    787 		Exp.dbl = expected;
    788 
    789 		if ((Res.layout.sign != Exp.layout.sign) ||
    790 			(Res.layout.exp != Exp.layout.exp) ||
    791 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
    792 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
    793 			result = "FAILED";
    794 			status = 1;
    795 		} else {
    796 			result = "PASSED";
    797 			status = 0;
    798 		}
    799 
    800 		printf("%s:%s:%s(%-13f",
    801 			round_mode_name[mode], result, flt_op_names[op], fA);
    802 		if (arg_count > 1) printf(", %-13a", fB);
    803 		if (arg_count > 2) printf(", %-13a", fC);
    804 		printf(") = %-13a", Res.dbl);
    805 		if (status) printf("\n\texpected %a", Exp.dbl);
    806 		putchar('\n');
    807 
    808 		if (debug) {
    809 			print_double("hi", hi);
    810 			print_double("lo", lo);
    811 			print_double("expected", expected);
    812 			print_double("got", Res.dbl);
    813 		}
    814 	}
    815 
    816 	return status;
    817 }
    818 
    819 int check_double_guarded_arithmetic_op(flt_op_t op)
    820 {
    821 	typedef struct {
    822 		int num, den, hi, lo;
    823 	} fdiv_t;
    824 	typedef struct {
    825 		double arg;
    826 		int exp, hi, lo;
    827 	} fsqrt_t;
    828 
    829 	char *result;
    830 	int status = 0;
    831 	dbl_overlay A, B, Z;
    832 	dbl_overlay Res, Exp;
    833 	double fA, fB, fC, fD;
    834 	round_mode_t mode;
    835 	int g, s;
    836 	int arg_count;
    837 	fdiv_t div_guard_cases[16] = {
    838 		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
    839 		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
    840 		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
    841 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
    842 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
    843 		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
    844 		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
    845 		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
    846 		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
    847 		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
    848 		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
    849 		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
    850 		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
    851 		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
    852 		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
    853 		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
    854 	};
    855 
    856 
    857 	fsqrt_t sqrt_guard_cases[16] = {
    858 		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
    859 		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
    860 		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
    861 		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
    862 		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
    863 		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
    864 		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
    865 		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
    866 		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
    867 		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
    868 		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
    869 		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
    870 		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
    871 		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
    872 		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
    873 		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
    874 	};
    875 
    876 	/*	0x1.00000 00000000p-3 */
    877 	/* set up the invariant fields of B, the arg to cause rounding */
    878 	B.dbl = 0.0;
    879 	B.layout.exp = 1020;
    880 
    881 	/* set up args so result is always Z = 1.200000000000<g>p+0 */
    882 	Z.dbl = 1.0;
    883 	Z.layout.sign = 0;
    884 
    885 #define TERNOP(op) \
    886 		arg_count = 3; \
    887         __asm__ volatile( \
    888 					op" %0, %1, %2, %3\n\t" \
    889 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
    890 #define BINOP(op) \
    891 		arg_count = 2; \
    892         __asm__ volatile( \
    893 					op" %0, %1, %2\n\t" \
    894 					: "=f"(fD) : "f"(fA) , "f"(fB));
    895 #define UNOP(op) \
    896 		arg_count = 1; \
    897         __asm__ volatile( \
    898 					op" %0, %1\n\t" \
    899 					: "=f"(fD) : "f"(fA));
    900 
    901 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    902 	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
    903 	for (g = 0; g < 16; g += 1) {
    904 		double lo, hi, expected;
    905 		int LSB;
    906 		int guard;
    907 		int z_sign = s;
    908 
    909 		/*
    910 		** one argument will have exponent = 0 as will the result (by
    911 		** design) so choose the other argument with exponent -3 to
    912 		** force a 3 bit shift for scaling leaving us with 3 guard bits
    913 		** and the LSB bit at the bottom of the manitssa.
    914 		*/
    915 		switch(op) {
    916 		case FADD:
    917 			/* 1p+0 + 1.000000000000<g>p-3 */
    918 			B.layout.frac_lo = g;
    919 
    920 			fB = s*B.dbl;
    921 			fA = s*1.0;
    922 
    923 			/* set up Z to be truncated result */
    924 
    925 			/* mask off LSB from resulting guard bits */
    926 			guard = g & 7;
    927 
    928 			Z.layout.frac_hi = 0x20000;
    929 			Z.layout.frac_lo = g >> 3;
    930 
    931 			break;
    932 		case FSUB:
    933 			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
    934 			A.dbl = 1.125;
    935 			/* add enough to avoid scaling of the result */
    936 			A.layout.frac_lo = 0x2;
    937 			fA = s*A.dbl;
    938 
    939 			B.layout.frac_lo = g;
    940 			fB = s*B.dbl;
    941 
    942 			/* set up Z to be truncated result */
    943 			guard = (0x10-g);
    944 			Z.layout.frac_hi = 0x0;
    945 			Z.layout.frac_lo = guard>>3;
    946 
    947 			/* mask off LSB from resulting guard bits */
    948 			guard &= 7;
    949 			break;
    950 		case FMUL:
    951 			/* 1 + g*2^-52 */
    952 			A.dbl = 1.0;
    953 			A.layout.frac_lo = g;
    954 			fA = s*A.dbl;
    955 			fB = 1.125;
    956 
    957 			/* set up Z to be truncated result */
    958 			Z.dbl = 1.0;
    959 			Z.layout.frac_hi = 0x20000;
    960 			Z.layout.frac_lo = g + (g>>3);
    961 			guard = g & 7;
    962 			break;
    963 		case FMADD:
    964 		case FMSUB:
    965 		case FNMADD:
    966 		case FNMSUB:
    967 			/* 1 + g*2^-52 */
    968 			A.dbl = 1.0;
    969 			A.layout.frac_lo = g;
    970 			fA = s*A.dbl;
    971 			fB = 1.125;
    972 
    973 			/* 1.0000000000001p-1 */
    974 			A.dbl = 0.5;
    975 			A.layout.frac_lo = 1;
    976 			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
    977 
    978 			/* set up Z to be truncated result */
    979 			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
    980 			guard = ((g & 7) + 0x4) & 7;
    981 			Z.dbl = 1.0;
    982 			Z.layout.frac_hi = 0xa0000;
    983 			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
    984 			break;
    985 		case FDIV:
    986 			/* g >> 3 == LSB, g & 7 == guard bits */
    987 			guard = g & 7;
    988 			if (guard == 0x4) {
    989 				/* special case guard bits == 4, inexact tie */
    990 				fB = s*2.0;
    991 				Z.dbl = 0.0;
    992 				if (g >> 3) {
    993 					fA = dbl_denorm_small + 2*dbl_denorm_small;
    994 					Z.layout.frac_lo = 0x1;
    995 				} else {
    996 					fA = dbl_denorm_small;
    997 				}
    998 			} else {
    999 				fA = s*div_guard_cases[g].num;
   1000 				fB = div_guard_cases[g].den;
   1001 
   1002 				printf("%d/%d\n",
   1003 					s*div_guard_cases[g].num,
   1004 					div_guard_cases[g].den);
   1005 				Z.dbl = 1.0;
   1006 				Z.layout.frac_hi = div_guard_cases[g].hi;
   1007 				Z.layout.frac_lo = div_guard_cases[g].lo;
   1008 			}
   1009 			break;
   1010 		case FSQRT:
   1011 			fA = s*sqrt_guard_cases[g].arg;
   1012 			Z.dbl = 1.0;
   1013 			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
   1014 			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
   1015 			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
   1016 			guard = g >> 1;
   1017 			if (g & 1) guard |= 1;
   1018 			/* don't have test cases for when X bit = 0 */
   1019 			if (guard == 0 || guard == 4) continue;
   1020 			break;
   1021 		default:
   1022 			assert("check_double_guarded_arithmetic_op: unexpected op",
   1023 				FALSE);
   1024 			break;
   1025 		}
   1026 
   1027 		/* get LSB for tie breaking */
   1028 		LSB = Z.layout.frac_lo & 1;
   1029 
   1030 		/* set up hi and lo */
   1031 		lo = z_sign*Z.dbl;
   1032 		Z.layout.frac_lo += 1;
   1033 		hi = z_sign*Z.dbl;
   1034 
   1035 		switch(mode) {
   1036 		case TO_NEAREST:
   1037 			/* look at 3 guard bits to determine expected rounding */
   1038 			switch(guard) {
   1039 			case 0:
   1040 			case 1: case 2: case 3:
   1041 				expected = lo;
   1042 				break;
   1043 			case 4:	/* tie: round to even */
   1044 				if (debug) printf("tie: LSB = %d\n", LSB);
   1045 				expected = (LSB == 0 ? lo : hi);
   1046 				break;
   1047 			case 5: case 6: case 7:
   1048 				expected = hi;
   1049 				break;
   1050 			default:
   1051 				assert("check_double_guarded_arithmetic_op: unexpected guard",
   1052 					FALSE);
   1053 			}
   1054 			break;
   1055 		case TO_ZERO:
   1056 			expected = lo;
   1057 			break;
   1058 		case TO_PLUS_INFINITY:
   1059 			if (guard == 0) {
   1060 				/* no rounding */
   1061 				expected = lo;
   1062 			} else {
   1063 				expected = (s == 1 ? hi : lo);
   1064 			}
   1065 			break;
   1066 		case TO_MINUS_INFINITY:
   1067 			if (guard == 0) {
   1068 				/* no rounding */
   1069 				expected = lo;
   1070 			} else {
   1071 				expected = (s == 1 ? lo : hi);
   1072 			}
   1073 			break;
   1074 		}
   1075 
   1076 		set_rounding_mode(mode);
   1077 
   1078 		/*
   1079 		** do the double precision dual operation just for comparison
   1080 		** when debugging
   1081 		*/
   1082 		switch(op) {
   1083 		case FADD:
   1084 			BINOP("fadd");
   1085 			Res.dbl = fD;
   1086 			break;
   1087 		case FSUB:
   1088 			BINOP("fsub");
   1089 			Res.dbl = fD;
   1090 			break;
   1091 		case FMUL:
   1092 			BINOP("fmul");
   1093 			Res.dbl = fD;
   1094 			break;
   1095 		case FMADD:
   1096 			TERNOP("fmadd");
   1097 			Res.dbl = fD;
   1098 			break;
   1099 		case FMSUB:
   1100 			TERNOP("fmsub");
   1101 			Res.dbl = fD;
   1102 			break;
   1103 		case FNMADD:
   1104 			TERNOP("fnmadd");
   1105 			Res.dbl = fD;
   1106 			break;
   1107 		case FNMSUB:
   1108 			TERNOP("fnmsub");
   1109 			Res.dbl = fD;
   1110 			break;
   1111 		case FDIV:
   1112 			BINOP("fdiv");
   1113 			Res.dbl = fD;
   1114 			break;
   1115 		case FSQRT:
   1116 			UNOP("fsqrt");
   1117 			Res.dbl = fD;
   1118 			break;
   1119 		default:
   1120 			assert("check_double_guarded_arithmetic_op: unexpected op",
   1121 				FALSE);
   1122 			break;
   1123 		}
   1124 #undef UNOP
   1125 #undef BINOP
   1126 #undef TERNOP
   1127 
   1128 		Exp.dbl = expected;
   1129 
   1130 		if ((Res.layout.sign != Exp.layout.sign) ||
   1131 			(Res.layout.exp != Exp.layout.exp) ||
   1132 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
   1133 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
   1134 			result = "FAILED";
   1135 			status = 1;
   1136 		} else {
   1137 			result = "PASSED";
   1138 			status = 0;
   1139 		}
   1140 
   1141 		printf("%s:%s:%s(%-13a",
   1142 			round_mode_name[mode], result, flt_op_names[op], fA);
   1143 		if (arg_count > 1) printf(", %-13a", fB);
   1144 		if (arg_count > 2) printf(", %-13a", fC);
   1145 		printf(") = %-13a", Res.dbl);
   1146 		if (status) printf("\n\texpected %a", Exp.dbl);
   1147 		putchar('\n');
   1148 
   1149 		if (debug) {
   1150 			print_double("hi", hi);
   1151 			print_double("lo", lo);
   1152 			print_double("expected", expected);
   1153 			print_double("got", Res.dbl);
   1154 		}
   1155 	}
   1156 
   1157 	return status;
   1158 }
   1159 
   1160 int test_float_arithmetic_ops()
   1161 {
   1162 	int status = 0;
   1163 	flt_op_t op;
   1164 
   1165 	/*
   1166 	** choose FP operands whose result should be rounded to either
   1167 	** lo or hi.
   1168 	*/
   1169 
   1170 	printf("-------------------------- %s --------------------------\n",
   1171 		"test rounding of float operators without guard bits");
   1172 	for (op = FADDS; op <= FDIVS; op++) {
   1173 		status |= check_single_arithmetic_op(op);
   1174 	}
   1175 
   1176 	printf("-------------------------- %s --------------------------\n",
   1177 		"test rounding of float operators with guard bits");
   1178 	for (op = FADDS; op <= FNMSUBS; op++) {
   1179 		status |= check_single_guarded_arithmetic_op(op);
   1180 	}
   1181 
   1182 	printf("-------------------------- %s --------------------------\n",
   1183 		"test rounding of double operators with guard bits");
   1184 	for (op = FADD; op <= FSQRT; op++) {
   1185 		status |= check_double_guarded_arithmetic_op(op);
   1186 	}
   1187 	return status;
   1188 }
   1189 
   1190 
   1191 int
   1192 main()
   1193 {
   1194 	int status = 0;
   1195 
   1196 	init();
   1197 
   1198 	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
   1199 	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
   1200 	status |= test_int_to_float_convert("test (float)int convert");
   1201 	status |= test_int_to_float_convert("test (float)int convert");
   1202 
   1203 #ifdef __powerpc64__
   1204 	status |= test_long_to_double_convert("test (double)long convert");
   1205 #endif
   1206 	status |= test_float_arithmetic_ops();
   1207 	return status;
   1208 }
   1209