Home | History | Annotate | Download | only in ppc64
      1 
      2 /*  Copyright (C) 2006 Dave Nomura
      3        dcnltc (at) us.ibm.com
      4 
      5     This program is free software; you can redistribute it and/or
      6     modify it under the terms of the GNU General Public License as
      7     published by the Free Software Foundation; either version 2 of the
      8     License, or (at your option) any later version.
      9 
     10     This program is distributed in the hope that it will be useful, but
     11     WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13     General Public License for more details.
     14 
     15     You should have received a copy of the GNU General Public License
     16     along with this program; if not, write to the Free Software
     17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18     02111-1307, USA.
     19 
     20     The GNU General Public License is contained in the file COPYING.
     21 */
     22 
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 #include <limits.h>
     26 
     27 typedef enum { FALSE=0, TRUE } bool_t;
     28 
     29 typedef enum {
     30 	FADDS, FSUBS, FMULS, FDIVS,
     31 	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
     32 	FADD, FSUB, FMUL, FDIV, FMADD,
     33 	FMSUB, FNMADD, FNMSUB, FSQRT
     34 } flt_op_t;
     35 
     36 typedef enum {
     37 	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
     38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
     39 
     40 const char *flt_op_names[] = {
     41 	"fadds", "fsubs", "fmuls", "fdivs",
     42 	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
     43 	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
     44 	"fnmsub", "fsqrt"
     45 };
     46 
     47 typedef unsigned int fpscr_t;
     48 
     49 typedef union {
     50 	float flt;
     51 	struct {
     52 		unsigned int sign:1;
     53 		unsigned int exp:8;
     54 		unsigned int frac:23;
     55 	} layout;
     56 } flt_overlay;
     57 
     58 typedef union {
     59 	double dbl;
     60 	struct {
     61 		unsigned int sign:1;
     62 		unsigned int exp:11;
     63 		unsigned int frac_hi:20;
     64 		unsigned int frac_lo:32;
     65 	} layout;
     66 	struct {
     67 		unsigned int hi;
     68 		unsigned int lo;
     69 	} dbl_pair;
     70 } dbl_overlay;
     71 
     72 void assert_fail(const char *msg,
     73 	const char* expr, const char* file, int line, const char*fn);
     74 
     75 #define STRING(__str)  #__str
     76 #define assert(msg, expr)                                           \
     77   ((void) ((expr) ? 0 :                                         \
     78            (assert_fail (msg, STRING(expr),                  \
     79                              __FILE__, __LINE__,                \
     80                              __PRETTY_FUNCTION__), 0)))
     81 float denorm_small;
     82 double dbl_denorm_small;
     83 float norm_small;
     84 bool_t debug = FALSE;
     85 bool_t long_is_64_bits = sizeof(long) == 8;
     86 
     87 void assert_fail (msg, expr, file, line, fn)
     88 const char* msg;
     89 const char* expr;
     90 const char* file;
     91 int line;
     92 const char*fn;
     93 {
     94    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
     95                msg, file, line, fn, expr );
     96    exit( 1 );
     97 }
     98 void set_rounding_mode(round_mode_t mode)
     99 {
    100 	switch(mode) {
    101 	case TO_NEAREST:
    102 		asm volatile("mtfsfi 7, 0");
    103 		break;
    104 	case TO_ZERO:
    105 		asm volatile("mtfsfi 7, 1");
    106 		break;
    107 	case TO_PLUS_INFINITY:
    108 		asm volatile("mtfsfi 7, 2");
    109 		break;
    110 	case TO_MINUS_INFINITY:
    111 		asm volatile("mtfsfi 7, 3");
    112 		break;
    113 	}
    114 }
    115 
    116 void print_double(char *msg, double dbl)
    117 {
    118 	dbl_overlay D;
    119 	D.dbl = dbl;
    120 
    121 	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
    122 			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
    123 			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
    124 }
    125 
    126 void print_single(char *msg, float *flt)
    127 {
    128 	flt_overlay F;
    129 	F.flt = *flt;
    130 
    131 	/* NOTE: for the purposes of comparing the fraction of a single with
    132 	**       a double left shift the .frac so that hex digits are grouped
    133 	**	     from left to right.  this is necessary because the size of a
    134 	**		 single mantissa (23) bits is not a multiple of 4
    135 	*/
    136 	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
    137 		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
    138 }
    139 
    140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
    141 {
    142 	int status = 0;
    143 	flt_overlay R, E;
    144 	char *result;
    145 
    146 	set_rounding_mode(mode);
    147 
    148 	E.flt = *expected;
    149 	R.flt = (float)dbl;
    150 
    151 	if ((R.layout.sign != E.layout.sign) ||
    152 		(R.layout.exp != E.layout.exp) ||
    153 		(R.layout.frac != E.layout.frac)) {
    154 		result = "FAILED";
    155 		status = 1;
    156 	} else {
    157 		result = "PASSED";
    158 		status = 0;
    159 	}
    160 	printf("%s:%s:(double)(%-20a) = %20a",
    161 		round_mode_name[mode], result, R.flt, dbl);
    162 	if (status) {
    163 		print_single("\n\texpected", &E.flt);
    164 		print_single("\n\trounded ", &R.flt);
    165 	}
    166 	putchar('\n');
    167 	return status;
    168 }
    169 
    170 int test_dbl_to_float_convert(char *msg, float *base)
    171 {
    172 	int status = 0;
    173 	double half = (double)denorm_small/2;
    174 	double qtr = half/2;
    175 	double D_hi = (double)*base + half + qtr;
    176 	double D_lo = (double)*base + half - qtr;
    177 	float F_lo = *base;
    178 	float F_hi = F_lo + denorm_small;
    179 
    180 
    181 	/*
    182 	** .....+-----+-----+-----+-----+---....
    183 	**      ^F_lo ^           ^     ^
    184 	**            D_lo
    185 	**                        D_hi
    186 	**                              F_hi
    187 	** F_lo and F_hi are two consecutive single float model numbers
    188 	** denorm_small distance apart. D_lo and D_hi are two numbers
    189 	** within that range that are not representable as single floats
    190 	** and will be rounded to either F_lo or F_hi.
    191 	*/
    192 	printf("-------------------------- %s --------------------------\n", msg);
    193 	if (debug) {
    194 		print_double("D_lo", D_lo);
    195 		print_double("D_hi", D_hi);
    196 		print_single("F_lo", &F_lo);
    197 		print_single("F_hi", &F_hi);
    198 	}
    199 
    200 	/* round to nearest */
    201 	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
    202 	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
    203 
    204 	/* round to zero */
    205 	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
    206 	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
    207 
    208 	/* round to +inf */
    209 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
    210 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
    211 
    212 	/* round to -inf */
    213 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
    214 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
    215 	return status;
    216 }
    217 
    218 void
    219 init()
    220 {
    221 	flt_overlay F;
    222 	dbl_overlay D;
    223 
    224 	/* small is the smallest denormalized single float number */
    225 	F.layout.sign = 0;
    226 	F.layout.exp = 0;
    227 	F.layout.frac = 1;
    228 	denorm_small = F.flt;	/* == 2^(-149) */
    229 	if (debug) {
    230 		print_double("float small", F.flt);
    231 	}
    232 
    233 	D.layout.sign = 0;
    234 	D.layout.exp = 0;
    235 	D.layout.frac_hi = 0;
    236 	D.layout.frac_lo = 1;
    237 	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
    238 	if (debug) {
    239 		print_double("double small", D.dbl);
    240 	}
    241 
    242 	/* n_small is the smallest normalized single precision float */
    243 	F.layout.exp = 1;
    244 	norm_small = F.flt;
    245 }
    246 
    247 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
    248 {
    249 	int status = 0;
    250 	int I = L;
    251 	char *int_name = "int";
    252 	flt_overlay R, E;
    253 	char *result;
    254 	int iter;
    255 
    256 	set_rounding_mode(mode);
    257 	E.flt = *expected;
    258 
    259 	for (iter = 0; iter < 2; iter++) {
    260 		int stat = 0;
    261 		R.flt = (iter == 0 ? (float)I : (float)L);
    262 
    263 		if ((R.layout.sign != E.layout.sign) ||
    264 			(R.layout.exp != E.layout.exp) ||
    265 			(R.layout.frac != E.layout.frac)) {
    266 			result = "FAILED";
    267 			stat = 1;
    268 		} else {
    269 			result = "PASSED";
    270 			stat = 0;
    271 		}
    272 		printf("%s:%s:(float)(%4s)%9d = %11.1f",
    273 			round_mode_name[mode], result, int_name, I, R.flt);
    274 		if (stat) {
    275 			print_single("\n\texpected: %.1f ", &E.flt);
    276 			print_single("\n\trounded ", &R.flt);
    277 		}
    278 		putchar('\n');
    279 		status |= stat;
    280 
    281 		if (!long_is_64_bits) break;
    282 		int_name = "long";
    283 	}
    284 	return status;
    285 }
    286 
    287 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
    288 {
    289 	int status = 0;
    290 	dbl_overlay R, E;
    291 	char *result;
    292 
    293 	set_rounding_mode(mode);
    294 	E.dbl = *expected;
    295 
    296 	R.dbl = (double)L;
    297 
    298 	if ((R.layout.sign != E.layout.sign) ||
    299 		(R.layout.exp != E.layout.exp) ||
    300 		(R.layout.frac_lo != E.layout.frac_lo) ||
    301 		(R.layout.frac_hi != E.layout.frac_hi)) {
    302 		result = "FAILED";
    303 		status = 1;
    304 	} else {
    305 		result = "PASSED";
    306 		status = 0;
    307 	}
    308 	printf("%s:%s:(double)(%18ld) = %20.1f",
    309 		round_mode_name[mode], result, L, R.dbl);
    310 	if (status) {
    311 		printf("\n\texpected %.1f : ", E.dbl);
    312 	}
    313 	putchar('\n');
    314 	return status;
    315 }
    316 
    317 int test_int_to_float_convert(char *msg)
    318 {
    319 	int status = 0;
    320 	int int24_hi = 0x03ff0fff;
    321 	int int24_lo = 0x03ff0ffd;
    322 	float pos_flt_lo = 67047420.0;
    323 	float pos_flt_hi = 67047424.0;
    324 	float neg_flt_lo = -67047420.0;
    325 	float neg_flt_hi = -67047424.0;
    326 
    327 	printf("-------------------------- %s --------------------------\n", msg);
    328 	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
    329 	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
    330 	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
    331 	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
    332 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
    333 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
    334 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
    335 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
    336 
    337 	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
    338 	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
    339 	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
    340 	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
    341 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
    342 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
    343 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
    344 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
    345 	return status;
    346 }
    347 
    348 #ifdef __powerpc64__
    349 int test_long_to_double_convert(char *msg)
    350 {
    351 	int status = 0;
    352 	long long55_hi = 0x07ff0ffffffffff;
    353 	long long55_lo = 0x07ff0fffffffffd;
    354 	double pos_dbl_lo = 36012304344547324.0;
    355 	double pos_dbl_hi = 36012304344547328.0;
    356 	double neg_dbl_lo = -36012304344547324.0;
    357 	double neg_dbl_hi = -36012304344547328.0;
    358 
    359 	printf("-------------------------- %s --------------------------\n", msg);
    360 	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
    361 	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
    362 	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
    363 	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
    364 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
    365 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
    366 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
    367 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
    368 
    369 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
    370 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
    371 	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
    372 	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
    373 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
    374 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
    375 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
    376 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
    377 	return status;
    378 }
    379 #endif
    380 
    381 int check_single_arithmetic_op(flt_op_t op)
    382 {
    383 		char *result;
    384         int status = 0;
    385         dbl_overlay R, E;
    386         double qtr, half, fA, fB, fD;
    387 		round_mode_t mode;
    388 		int q, s;
    389 		bool_t two_args = TRUE;
    390 		float whole = denorm_small;
    391 
    392 #define BINOP(op) \
    393         __asm__ volatile( \
    394 					op" %0, %1, %2\n\t" \
    395 					: "=f"(fD) : "f"(fA) , "f"(fB));
    396 #define UNOP(op) \
    397         __asm__ volatile( \
    398 					op" %0, %1\n\t" \
    399 					: "=f"(fD) : "f"(fA));
    400 
    401 		half = (double)whole/2;
    402 		qtr = half/2;
    403 
    404 		if (debug) {
    405 			print_double("qtr", qtr);
    406 			print_double("whole", whole);
    407 			print_double("2*whole", 2*whole);
    408 		}
    409 
    410 		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    411 		for (s = -1; s < 2; s += 2)
    412 		for (q = 1; q < 4; q += 2) {
    413 			double expected;
    414 			double lo = s*whole;
    415 			double hi = s*2*whole;
    416 
    417 			switch(op) {
    418 			case FADDS:
    419 				fA = s*whole;
    420 				fB = s*q*qtr;
    421 				break;
    422 			case FSUBS:
    423 				fA = s*2*whole;
    424 				fB = s*(q == 1 ? 3 : 1)*qtr;
    425 				break;
    426 			case FMULS:
    427 				fA = 0.5;
    428 				fB = s*(4+q)*half;
    429 				break;
    430 			case FDIVS:
    431 				fA = s*(4+q)*half;
    432 				fB = 2.0;
    433 				break;
    434 			default:
    435 				assert("check_single_arithmetic_op: unexpected op",
    436 					FALSE);
    437 				break;
    438 			}
    439 
    440 			switch(mode) {
    441 			case TO_NEAREST:
    442 				expected = (q == 1 ? lo : hi);
    443 				break;
    444 			case TO_ZERO:
    445 				expected = lo;
    446 				break;
    447 			case TO_PLUS_INFINITY:
    448 				expected = (s == 1 ? hi : lo);
    449 				break;
    450 			case TO_MINUS_INFINITY:
    451 				expected = (s == 1 ? lo : hi);
    452 				break;
    453 			}
    454 
    455 			set_rounding_mode(mode);
    456 
    457 			/*
    458 			** do the double precision dual operation just for comparison
    459 			** when debugging
    460 			*/
    461 			switch(op) {
    462 			case FADDS:
    463 				BINOP("fadds");
    464 				R.dbl = fD;
    465 				BINOP("fadd");
    466 				break;
    467 			case FSUBS:
    468 				BINOP("fsubs");
    469 				R.dbl = fD;
    470 				BINOP("fsub");
    471 				break;
    472 			case FMULS:
    473 				BINOP("fmuls");
    474 				R.dbl = fD;
    475 				BINOP("fmul");
    476 				break;
    477 			case FDIVS:
    478 				BINOP("fdivs");
    479 				R.dbl = fD;
    480 				BINOP("fdiv");
    481 				break;
    482 			default:
    483 				assert("check_single_arithmetic_op: unexpected op",
    484 					FALSE);
    485 				break;
    486 			}
    487 #undef UNOP
    488 #undef BINOP
    489 
    490 			E.dbl = expected;
    491 
    492 			if ((R.layout.sign != E.layout.sign) ||
    493 				(R.layout.exp != E.layout.exp) ||
    494 				(R.layout.frac_lo != E.layout.frac_lo) ||
    495 				(R.layout.frac_hi != E.layout.frac_hi)) {
    496 				result = "FAILED";
    497 				status = 1;
    498 			} else {
    499 				result = "PASSED";
    500 				status = 0;
    501 			}
    502 
    503 			printf("%s:%s:%s(%-13a",
    504 				round_mode_name[mode], result, flt_op_names[op], fA);
    505 			if (two_args) printf(", %-13a", fB);
    506 			printf(") = %-13a", R.dbl);
    507 			if (status) printf("\n\texpected %a", E.dbl);
    508 			putchar('\n');
    509 
    510 			if (debug) {
    511 				print_double("hi", hi);
    512 				print_double("lo", lo);
    513 				print_double("expected", expected);
    514 				print_double("got", R.dbl);
    515 				print_double("double result", fD);
    516 			}
    517 		}
    518 
    519 		return status;
    520 }
    521 
    522 int check_single_guarded_arithmetic_op(flt_op_t op)
    523 {
    524 		typedef struct {
    525 			int num, den, frac;
    526 		} fdivs_t;
    527 
    528 		char *result;
    529         int status = 0;
    530         flt_overlay A, B, Z;
    531         dbl_overlay Res, Exp;
    532         double fA, fB, fC, fD;
    533 		round_mode_t mode;
    534 		int g, s;
    535 		int arg_count;
    536 
    537 		fdivs_t divs_guard_cases[16] = {
    538 			{ 105, 56, 0x700000 },  /* : 0 */
    539 			{ 100, 57, 0x608FB8 },  /* : 1 */
    540 			{ 000, 00, 0x000000 },  /* : X */
    541 			{ 100, 52, 0x762762 },  /* : 3 */
    542 			{ 000, 00, 0x000000 },  /* : X */
    543 			{ 100, 55, 0x68BA2E },  /* : 5 */
    544 			{ 000, 00, 0x000000 },  /* : X */
    545 			{ 100, 51, 0x7AFAFA },  /* : 7 */
    546 			{ 000, 00, 0x000000 },  /* : X */
    547 			{ 100, 56, 0x649249 },  /* : 9 */
    548 			{ 000, 00, 0x000000 },  /* : X */
    549 			{ 100, 54, 0x6D097B },  /* : B */
    550 			{ 000, 00, 0x000000 },  /* : X */
    551 			{ 100, 59, 0x58F2FB },  /* : D */
    552 			{ 000, 00, 0x000000 },  /* : X */
    553 			{ 101, 52, 0x789D89 }  /* : F */
    554 		};
    555 
    556 		/*	0x1.00000 00000000p-3 */
    557 		/* set up the invariant fields of B, the arg to cause rounding */
    558 		B.flt = 0.0;
    559 		B.layout.exp = 124;  /* -3 */
    560 
    561 		/* set up args so result is always Z = 1.200000000000<g>p+0 */
    562 		Z.flt = 1.0;
    563 		Z.layout.sign = 0;
    564 
    565 #define TERNOP(op) \
    566 		arg_count = 3; \
    567         __asm__ volatile( \
    568 					op" %0, %1, %2, %3\n\t" \
    569 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
    570 #define BINOP(op) \
    571 		arg_count = 2; \
    572         __asm__ volatile( \
    573 					op" %0, %1, %2\n\t" \
    574 					: "=f"(fD) : "f"(fA) , "f"(fB));
    575 #define UNOP(op) \
    576 		arg_count = 1; \
    577         __asm__ volatile( \
    578 					op" %0, %1\n\t" \
    579 					: "=f"(fD) : "f"(fA));
    580 
    581 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    582 	for (s = -1; s < 2; s += 2)
    583 	for (g = 0; g < 16; g += 1) {
    584 		double lo, hi, expected;
    585 		int LSB;
    586 		int guard = 0;
    587 		int z_sign = s;
    588 
    589 		/*
    590 		** one argument will have exponent = 0 as will the result (by
    591 		** design) so choose the other argument with exponent -3 to
    592 		** force a 3 bit shift for scaling leaving us with 3 guard bits
    593 		** and the LSB bit at the bottom of the manitssa.
    594 		*/
    595 		switch(op) {
    596 		case FADDS:
    597 			/* 1p+0 + 1.00000<g>p-3 */
    598 			B.layout.frac = g;
    599 
    600 			fB = s*B.flt;
    601 			fA = s*1.0;
    602 
    603 			/* set up Z to be truncated result */
    604 
    605 			/* mask off LSB from resulting guard bits */
    606 			guard = g & 7;
    607 
    608 			Z.layout.frac = 0x100000 | (g >> 3);
    609 			break;
    610 		case FSUBS:
    611 			/* 1.200002p+0 - 1.000000000000<g>p-3 */
    612 			A.flt = 1.125;
    613 			/* add enough to avoid scaling of the result */
    614 			A.layout.frac |= 0x2;
    615 			fA = s*A.flt;
    616 
    617 			B.layout.frac = g;
    618 			fB = s*B.flt;
    619 
    620 			/* set up Z to be truncated result */
    621 			guard = (0x10-g);
    622 			Z.layout.frac = guard>>3;
    623 
    624 			/* mask off LSB from resulting guard bits */
    625 			guard &= 7;
    626 			break;
    627 		case FMULS:
    628 			/* 1 + g*2^-23 */
    629 			A.flt = 1.0;
    630 			A.layout.frac = g;
    631 			fA = s*A.flt;
    632 			fB = 1.125;
    633 
    634 			/* set up Z to be truncated result */
    635 			Z.flt = 1.0;
    636 			Z.layout.frac = 0x100000;
    637 			Z.layout.frac |= g + (g>>3);
    638 			guard = g & 7;
    639 			break;
    640 		case FDIVS:
    641 			/* g >> 3 == LSB, g & 7 == guard bits */
    642 			guard = g & 7;
    643 			if ((guard & 1) == 0) {
    644 				/* special case: guard bit X = 0 */
    645 				A.flt = denorm_small;
    646 				A.layout.frac = g;
    647 				fA = A.flt;
    648 				fB = s*8.0;
    649 				Z.flt = 0.0;
    650 				Z.layout.frac |= (g >> 3);
    651 			} else {
    652 				fA = s*divs_guard_cases[g].num;
    653 				fB = divs_guard_cases[g].den;
    654 
    655 				Z.flt = 1.0;
    656 				Z.layout.frac = divs_guard_cases[g].frac;
    657 			}
    658 			break;
    659 		case FMADDS:
    660 		case FMSUBS:
    661 		case FNMADDS:
    662 		case FNMSUBS:
    663 			/* 1 + g*2^-23 */
    664 			A.flt = 1.0;
    665 			A.layout.frac = g;
    666 			fA = s*A.flt;
    667 			fB = 1.125;
    668 
    669 			/* 1.000001p-1 */
    670 			A.flt = 0.5;
    671 			A.layout.frac = 1;
    672 			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
    673 
    674 			/* set up Z to be truncated result */
    675 			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
    676 			guard = ((g & 7) + 0x4) & 7;
    677 			Z.flt = 1.0;
    678 			Z.layout.frac = 0x500000;
    679 			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
    680 			break;
    681 		default:
    682 			assert("check_single_arithmetic_op: unexpected op",
    683 				FALSE);
    684 			break;
    685 		}
    686 
    687 		/* get LSB for tie breaking */
    688 		LSB = Z.layout.frac & 1;
    689 
    690 		/* set up hi and lo */
    691 		lo = z_sign*Z.flt;
    692 		Z.layout.frac += 1;
    693 		hi = z_sign*Z.flt;
    694 
    695 		switch(mode) {
    696 		case TO_NEAREST:
    697 			/* look at 3 guard bits to determine expected rounding */
    698 			switch(guard) {
    699 			case 0:
    700 			case 1: case 2: case 3:
    701 				expected = lo;
    702 				break;
    703 			case 4:	/* tie: round to even */
    704 				if (debug) printf("tie: LSB = %d\n", LSB);
    705 				expected = (LSB == 0 ? lo : hi);
    706 				break;
    707 			case 5: case 6: case 7:
    708 				expected = hi;
    709 				break;
    710 			default:
    711 				assert("check_single_guarded_arithmetic_op: unexpected guard",
    712 					FALSE);
    713 			}
    714 			break;
    715 		case TO_ZERO:
    716 			expected = lo;
    717 			break;
    718 		case TO_PLUS_INFINITY:
    719 			if (guard == 0) {
    720 				/* no rounding */
    721 				expected = lo;
    722 			} else {
    723 				expected = (s == 1 ? hi : lo);
    724 			}
    725 			break;
    726 		case TO_MINUS_INFINITY:
    727 			if (guard == 0) {
    728 				/* no rounding */
    729 				expected = lo;
    730 			} else {
    731 				expected = (s == 1 ? lo : hi);
    732 			}
    733 			break;
    734 		}
    735 
    736 		set_rounding_mode(mode);
    737 
    738 		/*
    739 		** do the double precision dual operation just for comparison
    740 		** when debugging
    741 		*/
    742 		switch(op) {
    743 		case FADDS:
    744 			BINOP("fadds");
    745 			Res.dbl = fD;
    746 			break;
    747 		case FSUBS:
    748 			BINOP("fsubs");
    749 			Res.dbl = fD;
    750 			break;
    751 		case FMULS:
    752 			BINOP("fmuls");
    753 			Res.dbl = fD;
    754 			break;
    755 		case FDIVS:
    756 			BINOP("fdivs");
    757 			Res.dbl = fD;
    758 			break;
    759 		case FMADDS:
    760 			TERNOP("fmadds");
    761 			Res.dbl = fD;
    762 			break;
    763 		case FMSUBS:
    764 			TERNOP("fmsubs");
    765 			Res.dbl = fD;
    766 			break;
    767 		case FNMADDS:
    768 			TERNOP("fnmadds");
    769 			Res.dbl = fD;
    770 			break;
    771 		case FNMSUBS:
    772 			TERNOP("fnmsubs");
    773 			Res.dbl = fD;
    774 			break;
    775 		default:
    776 			assert("check_single_guarded_arithmetic_op: unexpected op",
    777 				FALSE);
    778 			break;
    779 		}
    780 #undef UNOP
    781 #undef BINOP
    782 #undef TERNOP
    783 
    784 		Exp.dbl = expected;
    785 
    786 		if ((Res.layout.sign != Exp.layout.sign) ||
    787 			(Res.layout.exp != Exp.layout.exp) ||
    788 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
    789 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
    790 			result = "FAILED";
    791 			status = 1;
    792 		} else {
    793 			result = "PASSED";
    794 			status = 0;
    795 		}
    796 
    797 		printf("%s:%s:%s(%-13f",
    798 			round_mode_name[mode], result, flt_op_names[op], fA);
    799 		if (arg_count > 1) printf(", %-13a", fB);
    800 		if (arg_count > 2) printf(", %-13a", fC);
    801 		printf(") = %-13a", Res.dbl);
    802 		if (status) printf("\n\texpected %a", Exp.dbl);
    803 		putchar('\n');
    804 
    805 		if (debug) {
    806 			print_double("hi", hi);
    807 			print_double("lo", lo);
    808 			print_double("expected", expected);
    809 			print_double("got", Res.dbl);
    810 		}
    811 	}
    812 
    813 	return status;
    814 }
    815 
    816 int check_double_guarded_arithmetic_op(flt_op_t op)
    817 {
    818 	typedef struct {
    819 		int num, den, hi, lo;
    820 	} fdiv_t;
    821 	typedef struct {
    822 		double arg;
    823 		int exp, hi, lo;
    824 	} fsqrt_t;
    825 
    826 	char *result;
    827 	int status = 0;
    828 	dbl_overlay A, B, Z;
    829 	dbl_overlay Res, Exp;
    830 	double fA, fB, fC, fD;
    831 	round_mode_t mode;
    832 	int g, s;
    833 	int arg_count;
    834 	fdiv_t div_guard_cases[16] = {
    835 		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
    836 		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
    837 		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
    838 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
    839 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
    840 		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
    841 		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
    842 		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
    843 		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
    844 		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
    845 		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
    846 		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
    847 		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
    848 		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
    849 		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
    850 		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
    851 	};
    852 
    853 
    854 	fsqrt_t sqrt_guard_cases[16] = {
    855 		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
    856 		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
    857 		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
    858 		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
    859 		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
    860 		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
    861 		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
    862 		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
    863 		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
    864 		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
    865 		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
    866 		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
    867 		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
    868 		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
    869 		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
    870 		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
    871 	};
    872 
    873 	/*	0x1.00000 00000000p-3 */
    874 	/* set up the invariant fields of B, the arg to cause rounding */
    875 	B.dbl = 0.0;
    876 	B.layout.exp = 1020;
    877 
    878 	/* set up args so result is always Z = 1.200000000000<g>p+0 */
    879 	Z.dbl = 1.0;
    880 	Z.layout.sign = 0;
    881 
    882 #define TERNOP(op) \
    883 		arg_count = 3; \
    884         __asm__ volatile( \
    885 					op" %0, %1, %2, %3\n\t" \
    886 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
    887 #define BINOP(op) \
    888 		arg_count = 2; \
    889         __asm__ volatile( \
    890 					op" %0, %1, %2\n\t" \
    891 					: "=f"(fD) : "f"(fA) , "f"(fB));
    892 #define UNOP(op) \
    893 		arg_count = 1; \
    894         __asm__ volatile( \
    895 					op" %0, %1\n\t" \
    896 					: "=f"(fD) : "f"(fA));
    897 
    898 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
    899 	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
    900 	for (g = 0; g < 16; g += 1) {
    901 		double lo, hi, expected;
    902 		int LSB;
    903 		int guard;
    904 		int z_sign = s;
    905 
    906 		/*
    907 		** one argument will have exponent = 0 as will the result (by
    908 		** design) so choose the other argument with exponent -3 to
    909 		** force a 3 bit shift for scaling leaving us with 3 guard bits
    910 		** and the LSB bit at the bottom of the manitssa.
    911 		*/
    912 		switch(op) {
    913 		case FADD:
    914 			/* 1p+0 + 1.000000000000<g>p-3 */
    915 			B.layout.frac_lo = g;
    916 
    917 			fB = s*B.dbl;
    918 			fA = s*1.0;
    919 
    920 			/* set up Z to be truncated result */
    921 
    922 			/* mask off LSB from resulting guard bits */
    923 			guard = g & 7;
    924 
    925 			Z.layout.frac_hi = 0x20000;
    926 			Z.layout.frac_lo = g >> 3;
    927 
    928 			break;
    929 		case FSUB:
    930 			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
    931 			A.dbl = 1.125;
    932 			/* add enough to avoid scaling of the result */
    933 			A.layout.frac_lo = 0x2;
    934 			fA = s*A.dbl;
    935 
    936 			B.layout.frac_lo = g;
    937 			fB = s*B.dbl;
    938 
    939 			/* set up Z to be truncated result */
    940 			guard = (0x10-g);
    941 			Z.layout.frac_hi = 0x0;
    942 			Z.layout.frac_lo = guard>>3;
    943 
    944 			/* mask off LSB from resulting guard bits */
    945 			guard &= 7;
    946 			break;
    947 		case FMUL:
    948 			/* 1 + g*2^-52 */
    949 			A.dbl = 1.0;
    950 			A.layout.frac_lo = g;
    951 			fA = s*A.dbl;
    952 			fB = 1.125;
    953 
    954 			/* set up Z to be truncated result */
    955 			Z.dbl = 1.0;
    956 			Z.layout.frac_hi = 0x20000;
    957 			Z.layout.frac_lo = g + (g>>3);
    958 			guard = g & 7;
    959 			break;
    960 		case FMADD:
    961 		case FMSUB:
    962 		case FNMADD:
    963 		case FNMSUB:
    964 			/* 1 + g*2^-52 */
    965 			A.dbl = 1.0;
    966 			A.layout.frac_lo = g;
    967 			fA = s*A.dbl;
    968 			fB = 1.125;
    969 
    970 			/* 1.0000000000001p-1 */
    971 			A.dbl = 0.5;
    972 			A.layout.frac_lo = 1;
    973 			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
    974 
    975 			/* set up Z to be truncated result */
    976 			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
    977 			guard = ((g & 7) + 0x4) & 7;
    978 			Z.dbl = 1.0;
    979 			Z.layout.frac_hi = 0xa0000;
    980 			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
    981 			break;
    982 		case FDIV:
    983 			/* g >> 3 == LSB, g & 7 == guard bits */
    984 			guard = g & 7;
    985 			if (guard == 0x4) {
    986 				/* special case guard bits == 4, inexact tie */
    987 				fB = s*2.0;
    988 				Z.dbl = 0.0;
    989 				if (g >> 3) {
    990 					fA = dbl_denorm_small + 2*dbl_denorm_small;
    991 					Z.layout.frac_lo = 0x1;
    992 				} else {
    993 					fA = dbl_denorm_small;
    994 				}
    995 			} else {
    996 				fA = s*div_guard_cases[g].num;
    997 				fB = div_guard_cases[g].den;
    998 
    999 				printf("%d/%d\n",
   1000 					s*div_guard_cases[g].num,
   1001 					div_guard_cases[g].den);
   1002 				Z.dbl = 1.0;
   1003 				Z.layout.frac_hi = div_guard_cases[g].hi;
   1004 				Z.layout.frac_lo = div_guard_cases[g].lo;
   1005 			}
   1006 			break;
   1007 		case FSQRT:
   1008 			fA = s*sqrt_guard_cases[g].arg;
   1009 			Z.dbl = 1.0;
   1010 			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
   1011 			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
   1012 			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
   1013 			guard = g >> 1;
   1014 			if (g & 1) guard |= 1;
   1015 			/* don't have test cases for when X bit = 0 */
   1016 			if (guard == 0 || guard == 4) continue;
   1017 			break;
   1018 		default:
   1019 			assert("check_double_guarded_arithmetic_op: unexpected op",
   1020 				FALSE);
   1021 			break;
   1022 		}
   1023 
   1024 		/* get LSB for tie breaking */
   1025 		LSB = Z.layout.frac_lo & 1;
   1026 
   1027 		/* set up hi and lo */
   1028 		lo = z_sign*Z.dbl;
   1029 		Z.layout.frac_lo += 1;
   1030 		hi = z_sign*Z.dbl;
   1031 
   1032 		switch(mode) {
   1033 		case TO_NEAREST:
   1034 			/* look at 3 guard bits to determine expected rounding */
   1035 			switch(guard) {
   1036 			case 0:
   1037 			case 1: case 2: case 3:
   1038 				expected = lo;
   1039 				break;
   1040 			case 4:	/* tie: round to even */
   1041 				if (debug) printf("tie: LSB = %d\n", LSB);
   1042 				expected = (LSB == 0 ? lo : hi);
   1043 				break;
   1044 			case 5: case 6: case 7:
   1045 				expected = hi;
   1046 				break;
   1047 			default:
   1048 				assert("check_double_guarded_arithmetic_op: unexpected guard",
   1049 					FALSE);
   1050 			}
   1051 			break;
   1052 		case TO_ZERO:
   1053 			expected = lo;
   1054 			break;
   1055 		case TO_PLUS_INFINITY:
   1056 			if (guard == 0) {
   1057 				/* no rounding */
   1058 				expected = lo;
   1059 			} else {
   1060 				expected = (s == 1 ? hi : lo);
   1061 			}
   1062 			break;
   1063 		case TO_MINUS_INFINITY:
   1064 			if (guard == 0) {
   1065 				/* no rounding */
   1066 				expected = lo;
   1067 			} else {
   1068 				expected = (s == 1 ? lo : hi);
   1069 			}
   1070 			break;
   1071 		}
   1072 
   1073 		set_rounding_mode(mode);
   1074 
   1075 		/*
   1076 		** do the double precision dual operation just for comparison
   1077 		** when debugging
   1078 		*/
   1079 		switch(op) {
   1080 		case FADD:
   1081 			BINOP("fadd");
   1082 			Res.dbl = fD;
   1083 			break;
   1084 		case FSUB:
   1085 			BINOP("fsub");
   1086 			Res.dbl = fD;
   1087 			break;
   1088 		case FMUL:
   1089 			BINOP("fmul");
   1090 			Res.dbl = fD;
   1091 			break;
   1092 		case FMADD:
   1093 			TERNOP("fmadd");
   1094 			Res.dbl = fD;
   1095 			break;
   1096 		case FMSUB:
   1097 			TERNOP("fmsub");
   1098 			Res.dbl = fD;
   1099 			break;
   1100 		case FNMADD:
   1101 			TERNOP("fnmadd");
   1102 			Res.dbl = fD;
   1103 			break;
   1104 		case FNMSUB:
   1105 			TERNOP("fnmsub");
   1106 			Res.dbl = fD;
   1107 			break;
   1108 		case FDIV:
   1109 			BINOP("fdiv");
   1110 			Res.dbl = fD;
   1111 			break;
   1112 		case FSQRT:
   1113 			UNOP("fsqrt");
   1114 			Res.dbl = fD;
   1115 			break;
   1116 		default:
   1117 			assert("check_double_guarded_arithmetic_op: unexpected op",
   1118 				FALSE);
   1119 			break;
   1120 		}
   1121 #undef UNOP
   1122 #undef BINOP
   1123 #undef TERNOP
   1124 
   1125 		Exp.dbl = expected;
   1126 
   1127 		if ((Res.layout.sign != Exp.layout.sign) ||
   1128 			(Res.layout.exp != Exp.layout.exp) ||
   1129 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
   1130 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
   1131 			result = "FAILED";
   1132 			status = 1;
   1133 		} else {
   1134 			result = "PASSED";
   1135 			status = 0;
   1136 		}
   1137 
   1138 		printf("%s:%s:%s(%-13a",
   1139 			round_mode_name[mode], result, flt_op_names[op], fA);
   1140 		if (arg_count > 1) printf(", %-13a", fB);
   1141 		if (arg_count > 2) printf(", %-13a", fC);
   1142 		printf(") = %-13a", Res.dbl);
   1143 		if (status) printf("\n\texpected %a", Exp.dbl);
   1144 		putchar('\n');
   1145 
   1146 		if (debug) {
   1147 			print_double("hi", hi);
   1148 			print_double("lo", lo);
   1149 			print_double("expected", expected);
   1150 			print_double("got", Res.dbl);
   1151 		}
   1152 	}
   1153 
   1154 	return status;
   1155 }
   1156 
   1157 int test_float_arithmetic_ops()
   1158 {
   1159 	int status = 0;
   1160 	flt_op_t op;
   1161 
   1162 	/*
   1163 	** choose FP operands whose result should be rounded to either
   1164 	** lo or hi.
   1165 	*/
   1166 
   1167 	printf("-------------------------- %s --------------------------\n",
   1168 		"test rounding of float operators without guard bits");
   1169 	for (op = FADDS; op <= FDIVS; op++) {
   1170 		status |= check_single_arithmetic_op(op);
   1171 	}
   1172 
   1173 	printf("-------------------------- %s --------------------------\n",
   1174 		"test rounding of float operators with guard bits");
   1175 	for (op = FADDS; op <= FNMSUBS; op++) {
   1176 		status |= check_single_guarded_arithmetic_op(op);
   1177 	}
   1178 
   1179 	printf("-------------------------- %s --------------------------\n",
   1180 		"test rounding of double operators with guard bits");
   1181 	for (op = FADD; op <= FSQRT; op++) {
   1182 		status |= check_double_guarded_arithmetic_op(op);
   1183 	}
   1184 	return status;
   1185 }
   1186 
   1187 
   1188 int
   1189 main()
   1190 {
   1191 	int status = 0;
   1192 
   1193 	init();
   1194 
   1195 	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
   1196 	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
   1197 	status |= test_int_to_float_convert("test (float)int convert");
   1198 	status |= test_int_to_float_convert("test (float)int convert");
   1199 
   1200 #ifdef __powerpc64__
   1201 	status |= test_long_to_double_convert("test (double)long convert");
   1202 #endif
   1203 	status |= test_float_arithmetic_ops();
   1204 	return status;
   1205 }
   1206