Home | History | Annotate | Download | only in x86
      1 #ifdef __x86_64__
      2 
      3 /* -----------------------------------------------------------------------
      4    x86-ffi64.c - Copyright (c) 2002  Bo Thorsen <bo (at) suse.de>
      5 
      6    x86-64 Foreign Function Interface
      7 
      8    Permission is hereby granted, free of charge, to any person obtaining
      9    a copy of this software and associated documentation files (the
     10    ``Software''), to deal in the Software without restriction, including
     11    without limitation the rights to use, copy, modify, merge, publish,
     12    distribute, sublicense, and/or sell copies of the Software, and to
     13    permit persons to whom the Software is furnished to do so, subject to
     14    the following conditions:
     15 
     16    The above copyright notice and this permission notice shall be included
     17    in all copies or substantial portions of the Software.
     18 
     19    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22    IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     23    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     24    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     25    OTHER DEALINGS IN THE SOFTWARE.
     26    ----------------------------------------------------------------------- */
     27 
     28 #include <ffi.h>
     29 #include <ffi_common.h>
     30 
     31 #include <stdlib.h>
     32 #include <stdarg.h>
     33 
     34 #define MAX_GPR_REGS 6
     35 #define MAX_SSE_REGS 8
     36 
     37 typedef struct RegisterArgs {
     38 	/* Registers for argument passing.  */
     39 	UINT64		gpr[MAX_GPR_REGS];
     40 	__int128_t	sse[MAX_SSE_REGS];
     41 } RegisterArgs;
     42 
     43 extern void
     44 ffi_call_unix64(
     45 	void*			args,
     46 	unsigned long	bytes,
     47 	unsigned		flags,
     48 	void*			raddr,
     49 	void			(*fnaddr)(),
     50 	unsigned		ssecount);
     51 
     52 /*	All reference to register classes here is identical to the code in
     53 	gcc/config/i386/i386.c. Do *not* change one without the other.  */
     54 
     55 /*	Register class used for passing given 64bit part of the argument.
     56 	These represent classes as documented by the PS ABI, with the exception
     57 	of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     58 	use SF or DFmode move instead of DImode to avoid reformating penalties.
     59 
     60 	Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
     61 	whenever possible (upper half does contain padding).  */
     62 enum x86_64_reg_class
     63 {
     64 	X86_64_NO_CLASS,
     65 	X86_64_INTEGER_CLASS,
     66 	X86_64_INTEGERSI_CLASS,
     67 	X86_64_SSE_CLASS,
     68 	X86_64_SSESF_CLASS,
     69 	X86_64_SSEDF_CLASS,
     70 	X86_64_SSEUP_CLASS,
     71 	X86_64_X87_CLASS,
     72 	X86_64_X87UP_CLASS,
     73 	X86_64_COMPLEX_X87_CLASS,
     74 	X86_64_MEMORY_CLASS
     75 };
     76 
     77 #define MAX_CLASSES 4
     78 #define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
     79 
     80 /*	x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
     81 	of this code is to classify each 8bytes of incoming argument by the register
     82 	class and assign registers accordingly.  */
     83 
     84 /*	Return the union class of CLASS1 and CLASS2.
     85 	See the x86-64 PS ABI for details.  */
     86 static enum x86_64_reg_class
     87 merge_classes(
     88 	enum x86_64_reg_class	class1,
     89 	enum x86_64_reg_class	class2)
     90 {
     91 	/*	Rule #1: If both classes are equal, this is the resulting class.  */
     92 	if (class1 == class2)
     93 		return class1;
     94 
     95 	/*	Rule #2: If one of the classes is NO_CLASS, the resulting class is
     96 		the other class.  */
     97 	if (class1 == X86_64_NO_CLASS)
     98 		return class2;
     99 
    100 	if (class2 == X86_64_NO_CLASS)
    101 		return class1;
    102 
    103 	/*	Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    104 	if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    105 		return X86_64_MEMORY_CLASS;
    106 
    107 	/*	Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    108 	if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
    109 		|| (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
    110 		return X86_64_INTEGERSI_CLASS;
    111 
    112 	if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    113 		|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    114 		return X86_64_INTEGER_CLASS;
    115 
    116 	/*	Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    117 		MEMORY is used.  */
    118 	if (class1 == X86_64_X87_CLASS
    119 		|| class1 == X86_64_X87UP_CLASS
    120 		|| class1 == X86_64_COMPLEX_X87_CLASS
    121 		|| class2 == X86_64_X87_CLASS
    122 		|| class2 == X86_64_X87UP_CLASS
    123 		|| class2 == X86_64_COMPLEX_X87_CLASS)
    124 		return X86_64_MEMORY_CLASS;
    125 
    126 	/*	Rule #6: Otherwise class SSE is used.  */
    127 	return X86_64_SSE_CLASS;
    128 }
    129 
    130 /*	Classify the argument of type TYPE and mode MODE.
    131 	CLASSES will be filled by the register class used to pass each word
    132 	of the operand.  The number of words is returned.  In case the parameter
    133 	should be passed in memory, 0 is returned. As a special case for zero
    134 	sized containers, classes[0] will be NO_CLASS and 1 is returned.
    135 
    136 	See the x86-64 PS ABI for details.	*/
    137 
    138 static int
    139 classify_argument(
    140 	ffi_type*				type,
    141 	enum x86_64_reg_class	classes[],
    142 	size_t					byte_offset)
    143 {
    144 	switch (type->type)
    145 	{
    146 		case FFI_TYPE_UINT8:
    147 		case FFI_TYPE_SINT8:
    148 		case FFI_TYPE_UINT16:
    149 		case FFI_TYPE_SINT16:
    150 		case FFI_TYPE_UINT32:
    151 		case FFI_TYPE_SINT32:
    152 		case FFI_TYPE_UINT64:
    153 		case FFI_TYPE_SINT64:
    154 		case FFI_TYPE_POINTER:
    155 #if 0
    156 			if (byte_offset + type->size <= 4)
    157 				classes[0] = X86_64_INTEGERSI_CLASS;
    158 			else
    159 				classes[0] = X86_64_INTEGER_CLASS;
    160 
    161 			return 1;
    162 #else
    163 		{
    164 			int size = byte_offset + type->size;
    165 
    166 			if (size <= 4)
    167 			{
    168 				classes[0] = X86_64_INTEGERSI_CLASS;
    169 				return 1;
    170 			}
    171 			else if (size <= 8)
    172 			{
    173 				classes[0] = X86_64_INTEGER_CLASS;
    174 				return 1;
    175 			}
    176 			else if (size <= 12)
    177 			{
    178 				classes[0] = X86_64_INTEGER_CLASS;
    179 				classes[1] = X86_64_INTEGERSI_CLASS;
    180 				return 2;
    181 			}
    182 			else if (size <= 16)
    183 			{
    184 				classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
    185 				return 2;
    186 			}
    187 			else
    188 				FFI_ASSERT (0);
    189 		}
    190 #endif
    191 
    192 		case FFI_TYPE_FLOAT:
    193 			if (byte_offset == 0)
    194 				classes[0] = X86_64_SSESF_CLASS;
    195 			else
    196 				classes[0] = X86_64_SSE_CLASS;
    197 
    198 			return 1;
    199 
    200 		case FFI_TYPE_DOUBLE:
    201 			classes[0] = X86_64_SSEDF_CLASS;
    202 			return 1;
    203 
    204 		case FFI_TYPE_LONGDOUBLE:
    205 			classes[0] = X86_64_X87_CLASS;
    206 			classes[1] = X86_64_X87UP_CLASS;
    207 			return 2;
    208 
    209 		case FFI_TYPE_STRUCT:
    210 		{
    211 			ffi_type**				ptr;
    212 			int						i;
    213 			enum x86_64_reg_class	subclasses[MAX_CLASSES];
    214 			const int				UNITS_PER_WORD = 8;
    215 			int						words =
    216 				(type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
    217 
    218 			/* If the struct is larger than 16 bytes, pass it on the stack.  */
    219 			if (type->size > 16)
    220 				return 0;
    221 
    222 			for (i = 0; i < words; i++)
    223 				classes[i] = X86_64_NO_CLASS;
    224 
    225 			/* Merge the fields of structure.  */
    226 			for (ptr = type->elements; *ptr != NULL; ptr++)
    227 			{
    228 				byte_offset = ALIGN(byte_offset, (*ptr)->alignment);
    229 
    230 				int	num = classify_argument(*ptr, subclasses, byte_offset % 8);
    231 
    232 				if (num == 0)
    233 					return 0;
    234 
    235 				int pos = byte_offset / 8;
    236 
    237 				for (i = 0; i < num; i++)
    238 				{
    239 					classes[i + pos] =
    240 						merge_classes(subclasses[i], classes[i + pos]);
    241 				}
    242 
    243 				byte_offset += (*ptr)->size;
    244 			}
    245 
    246 			if (words > 2)
    247 			{
    248 				/* When size > 16 bytes, if the first one isn't
    249 			           X86_64_SSE_CLASS or any other ones aren't
    250 			           X86_64_SSEUP_CLASS, everything should be passed in
    251 			           memory.  */
    252 				if (classes[0] != X86_64_SSE_CLASS)
    253 					return 0;
    254 
    255 				for (i = 1; i < words; i++)
    256 					if (classes[i] != X86_64_SSEUP_CLASS)
    257 						return 0;
    258 			}
    259 
    260 
    261 			/* Final merger cleanup.  */
    262 			for (i = 0; i < words; i++)
    263 			{
    264 				/*	If one class is MEMORY, everything should be passed in
    265 					memory.  */
    266 				if (classes[i] == X86_64_MEMORY_CLASS)
    267 					return 0;
    268 
    269 				/*	The X86_64_SSEUP_CLASS should be always preceded by
    270 					X86_64_SSE_CLASS.  */
    271 				if (classes[i] == X86_64_SSEUP_CLASS
    272 					&& classes[i - 1] != X86_64_SSE_CLASS
    273 					&& classes[i - 1] != X86_64_SSEUP_CLASS)
    274 				{
    275 					FFI_ASSERT(i != 0);
    276 					classes[i] = X86_64_SSE_CLASS;
    277 				}
    278 
    279 				/*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
    280 				if (classes[i] == X86_64_X87UP_CLASS
    281 					&& classes[i - 1] != X86_64_X87_CLASS)
    282 				{
    283 					FFI_ASSERT(i != 0);
    284 					classes[i] = X86_64_SSE_CLASS;
    285 				}
    286 			}
    287 
    288 			return words;
    289 		}
    290 
    291 		default:
    292 			FFI_ASSERT(0);
    293 	}
    294 
    295 	return 0; /* Never reached.  */
    296 }
    297 
    298 /*	Examine the argument and return set number of register required in each
    299 	class.  Return zero if parameter should be passed in memory, otherwise
    300 	the number of registers.  */
    301 static int
    302 examine_argument(
    303 	ffi_type*				type,
    304 	enum x86_64_reg_class	classes[MAX_CLASSES],
    305 	_Bool					in_return,
    306 	int*					pngpr,
    307 	int*					pnsse)
    308 {
    309 	int	n = classify_argument(type, classes, 0);
    310 	int ngpr = 0;
    311 	int	nsse = 0;
    312 	int	i;
    313 
    314 	if (n == 0)
    315 		return 0;
    316 
    317 	for (i = 0; i < n; ++i)
    318 	{
    319 		switch (classes[i])
    320 		{
    321 			case X86_64_INTEGER_CLASS:
    322 			case X86_64_INTEGERSI_CLASS:
    323 				ngpr++;
    324 				break;
    325 
    326 			case X86_64_SSE_CLASS:
    327 			case X86_64_SSESF_CLASS:
    328 			case X86_64_SSEDF_CLASS:
    329 				nsse++;
    330 				break;
    331 
    332 			case X86_64_NO_CLASS:
    333 			case X86_64_SSEUP_CLASS:
    334 				break;
    335 
    336 			case X86_64_X87_CLASS:
    337 			case X86_64_X87UP_CLASS:
    338 			case X86_64_COMPLEX_X87_CLASS:
    339 				return in_return != 0;
    340 
    341 			default:
    342 				abort();
    343 		}
    344 	}
    345 
    346 	*pngpr = ngpr;
    347 	*pnsse = nsse;
    348 
    349 	return n;
    350 }
    351 
    352 /* Perform machine dependent cif processing.  */
    353 ffi_status
    354 ffi_prep_cif_machdep(
    355 	ffi_cif*	cif)
    356 {
    357 	int						gprcount = 0;
    358 	int						ssecount = 0;
    359 	int						flags = cif->rtype->type;
    360 	int						i, avn, n, ngpr, nsse;
    361 	enum x86_64_reg_class	classes[MAX_CLASSES];
    362 	size_t					bytes;
    363 
    364 	if (flags != FFI_TYPE_VOID)
    365 	{
    366 		n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
    367 
    368 		if (n == 0)
    369 		{
    370 			/*	The return value is passed in memory.  A pointer to that
    371 				memory is the first argument.  Allocate a register for it.  */
    372 			gprcount++;
    373 
    374 			/* We don't have to do anything in asm for the return.  */
    375 			flags = FFI_TYPE_VOID;
    376 		}
    377 		else if (flags == FFI_TYPE_STRUCT)
    378 		{
    379 			/* Mark which registers the result appears in.  */
    380 			_Bool sse0 = SSE_CLASS_P(classes[0]);
    381 			_Bool sse1 = n == 2 && SSE_CLASS_P(classes[1]);
    382 
    383 			if (sse0 && !sse1)
    384 				flags |= 1 << 8;
    385 			else if (!sse0 && sse1)
    386 				flags |= 1 << 9;
    387 			else if (sse0 && sse1)
    388 				flags |= 1 << 10;
    389 
    390 			/* Mark the true size of the structure.  */
    391 			flags |= cif->rtype->size << 12;
    392 		}
    393 	}
    394 
    395 	/*	Go over all arguments and determine the way they should be passed.
    396 		If it's in a register and there is space for it, let that be so. If
    397 		not, add it's size to the stack byte count.  */
    398 	for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
    399 	{
    400 		if (examine_argument(cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
    401 			|| gprcount + ngpr > MAX_GPR_REGS
    402 			|| ssecount + nsse > MAX_SSE_REGS)
    403 		{
    404 			long align = cif->arg_types[i]->alignment;
    405 
    406 			if (align < 8)
    407 				align = 8;
    408 
    409 			bytes = ALIGN(bytes, align);
    410 			bytes += cif->arg_types[i]->size;
    411 		}
    412 		else
    413 		{
    414 			gprcount += ngpr;
    415 			ssecount += nsse;
    416 		}
    417 	}
    418 
    419 	if (ssecount)
    420 		flags |= 1 << 11;
    421 
    422 	cif->flags = flags;
    423 	cif->bytes = bytes;
    424 	cif->bytes = ALIGN(bytes,8);
    425 
    426 	return FFI_OK;
    427 }
    428 
    429 void
    430 ffi_call(
    431 	ffi_cif*	cif,
    432 	void		(*fn)(),
    433 	void*		rvalue,
    434 	void**		avalue)
    435 {
    436 	enum x86_64_reg_class	classes[MAX_CLASSES];
    437 	char*					stack;
    438 	char*					argp;
    439 	ffi_type**				arg_types;
    440 	int						gprcount, ssecount, ngpr, nsse, i, avn;
    441 	_Bool					ret_in_memory;
    442 	RegisterArgs*			reg_args;
    443 
    444 	/* Can't call 32-bit mode from 64-bit mode.  */
    445 	FFI_ASSERT(cif->abi == FFI_UNIX64);
    446 
    447 	/*	If the return value is a struct and we don't have a return value
    448 		address then we need to make one.  Note the setting of flags to
    449 		VOID above in ffi_prep_cif_machdep.  */
    450 	ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
    451 		&& (cif->flags & 0xff) == FFI_TYPE_VOID);
    452 
    453 	if (rvalue == NULL && ret_in_memory)
    454 		rvalue = alloca (cif->rtype->size);
    455 
    456 	/* Allocate the space for the arguments, plus 4 words of temp space.  */
    457 	stack = alloca(sizeof(RegisterArgs) + cif->bytes + 4 * 8);
    458 	reg_args = (RegisterArgs*)stack;
    459 	argp = stack + sizeof(RegisterArgs);
    460 
    461 	gprcount = ssecount = 0;
    462 
    463 	/*	If the return value is passed in memory, add the pointer as the
    464 		first integer argument.  */
    465 	if (ret_in_memory)
    466 		reg_args->gpr[gprcount++] = (long) rvalue;
    467 
    468 	avn = cif->nargs;
    469 	arg_types = cif->arg_types;
    470 
    471 	for (i = 0; i < avn; ++i)
    472 	{
    473 		size_t size = arg_types[i]->size;
    474 		int n;
    475 
    476 		n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
    477 
    478 		if (n == 0
    479 			|| gprcount + ngpr > MAX_GPR_REGS
    480 			|| ssecount + nsse > MAX_SSE_REGS)
    481 		{
    482 			long align = arg_types[i]->alignment;
    483 
    484 			/* Stack arguments are *always* at least 8 byte aligned.  */
    485 			if (align < 8)
    486 				align = 8;
    487 
    488 			/* Pass this argument in memory.  */
    489 			argp = (void *) ALIGN (argp, align);
    490 			memcpy (argp, avalue[i], size);
    491 			argp += size;
    492 		}
    493 		else
    494 		{	/* The argument is passed entirely in registers.  */
    495 			char *a = (char *) avalue[i];
    496 			int j;
    497 
    498 			for (j = 0; j < n; j++, a += 8, size -= 8)
    499 			{
    500 				switch (classes[j])
    501 				{
    502 					case X86_64_INTEGER_CLASS:
    503 					case X86_64_INTEGERSI_CLASS:
    504 						reg_args->gpr[gprcount] = 0;
    505 						switch (arg_types[i]->type) {
    506 						case FFI_TYPE_SINT8:
    507 						   {
    508 							int8_t shortval = *(int8_t*)a;
    509 							int64_t  actval = (int64_t)shortval;
    510 							reg_args->gpr[gprcount] = actval;
    511 							/*memcpy (&reg_args->gpr[gprcount], &actval, 8);*/
    512 							break;
    513 						   }
    514 
    515 						case FFI_TYPE_SINT16:
    516 						   {
    517 							int16_t shortval = *(int16_t*)a;
    518 							int64_t  actval = (int64_t)shortval;
    519 							memcpy (&reg_args->gpr[gprcount], &actval, 8);
    520 							break;
    521 						   }
    522 
    523 						case FFI_TYPE_SINT32:
    524 						   {
    525 							int32_t shortval = *(int32_t*)a;
    526 							int64_t  actval = (int64_t)shortval;
    527 							memcpy (&reg_args->gpr[gprcount], &actval, 8);
    528 							break;
    529 						   }
    530 
    531 						case FFI_TYPE_UINT8:
    532 						   {
    533 							u_int8_t shortval = *(u_int8_t*)a;
    534 							u_int64_t  actval = (u_int64_t)shortval;
    535 							/*memcpy (&reg_args->gpr[gprcount], &actval, 8);*/
    536 							reg_args->gpr[gprcount] = actval;
    537 							break;
    538 						   }
    539 
    540 						case FFI_TYPE_UINT16:
    541 						   {
    542 							u_int16_t shortval = *(u_int16_t*)a;
    543 							u_int64_t  actval = (u_int64_t)shortval;
    544 							memcpy (&reg_args->gpr[gprcount], &actval, 8);
    545 							break;
    546 						   }
    547 
    548 						case FFI_TYPE_UINT32:
    549 						   {
    550 							u_int32_t shortval = *(u_int32_t*)a;
    551 							u_int64_t  actval = (u_int64_t)shortval;
    552 							memcpy (&reg_args->gpr[gprcount], &actval, 8);
    553 							break;
    554 						   }
    555 
    556 						default:
    557 							//memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
    558 							reg_args->gpr[gprcount] = *(int64_t*)a;
    559 						}
    560 						gprcount++;
    561 						break;
    562 
    563 					case X86_64_SSE_CLASS:
    564 					case X86_64_SSEDF_CLASS:
    565 						reg_args->sse[ssecount++] = *(UINT64 *) a;
    566 						break;
    567 
    568 					case X86_64_SSESF_CLASS:
    569 						reg_args->sse[ssecount++] = *(UINT32 *) a;
    570 						break;
    571 
    572 					default:
    573 						abort();
    574 				}
    575 			}
    576 		}
    577 	}
    578 
    579 	ffi_call_unix64 (stack, cif->bytes + sizeof(RegisterArgs),
    580 		cif->flags, rvalue, fn, ssecount);
    581 }
    582 
    583 extern void ffi_closure_unix64(void);
    584 
    585 ffi_status
    586 ffi_prep_closure(
    587 	ffi_closure*	closure,
    588 	ffi_cif*		cif,
    589 	void			(*fun)(ffi_cif*, void*, void**, void*),
    590 	void*			user_data)
    591 {
    592 	if (cif->abi != FFI_UNIX64)
    593 		return FFI_BAD_ABI;
    594 
    595 	volatile unsigned short*	tramp =
    596 		(volatile unsigned short*)&closure->tramp[0];
    597 
    598 	tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
    599 	*(void* volatile*)&tramp[1] = ffi_closure_unix64;
    600 	tramp[5] = 0xba49;		/* mov <data>, %r10	*/
    601 	*(void* volatile*)&tramp[6] = closure;
    602 
    603 	/*	Set the carry bit if the function uses any sse registers.
    604 		This is clc or stc, together with the first byte of the jmp.  */
    605 	tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
    606 	tramp[11] = 0xe3ff;			/* jmp *%r11    */
    607 
    608 	closure->cif = cif;
    609 	closure->fun = fun;
    610 	closure->user_data = user_data;
    611 
    612 	return FFI_OK;
    613 }
    614 
    615 #pragma clang diagnostic push
    616 #pragma clang diagnostic ignored "-Wmissing-prototypes"
    617 int
    618 ffi_closure_unix64_inner(
    619 	ffi_closure*	closure,
    620 	void*			rvalue,
    621 	RegisterArgs*	reg_args,
    622 	char*			argp)
    623 #pragma clang diagnostic pop
    624 {
    625 	ffi_cif*	cif = closure->cif;
    626 	void**		avalue = alloca(cif->nargs * sizeof(void *));
    627 	ffi_type**	arg_types;
    628 	long		i, avn;
    629 	int			gprcount = 0;
    630 	int			ssecount = 0;
    631 	int			ngpr, nsse;
    632 	int			ret;
    633 
    634 	ret = cif->rtype->type;
    635 
    636 	if (ret != FFI_TYPE_VOID)
    637     {
    638 		enum x86_64_reg_class classes[MAX_CLASSES];
    639 		int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
    640 
    641 		if (n == 0)
    642 		{
    643 			/* The return value goes in memory.  Arrange for the closure
    644 			return value to go directly back to the original caller.  */
    645 			rvalue = (void *) reg_args->gpr[gprcount++];
    646 
    647 			/* We don't have to do anything in asm for the return.  */
    648 			ret = FFI_TYPE_VOID;
    649 		}
    650 		else if (ret == FFI_TYPE_STRUCT && n == 2)
    651 		{
    652 			/* Mark which register the second word of the structure goes in.  */
    653 			_Bool sse0 = SSE_CLASS_P (classes[0]);
    654 			_Bool sse1 = SSE_CLASS_P (classes[1]);
    655 
    656 			if (!sse0 && sse1)
    657 				ret |= 1 << 8;
    658 			else if (sse0 && !sse1)
    659 				ret |= 1 << 9;
    660 		}
    661 	}
    662 
    663 	avn = cif->nargs;
    664 	arg_types = cif->arg_types;
    665 
    666 	for (i = 0; i < avn; ++i)
    667 	{
    668 		enum x86_64_reg_class classes[MAX_CLASSES];
    669 		int n;
    670 
    671 		n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
    672 
    673 		if (n == 0
    674 			|| gprcount + ngpr > MAX_GPR_REGS
    675 			|| ssecount + nsse > MAX_SSE_REGS)
    676 		{
    677 			long align = arg_types[i]->alignment;
    678 
    679 			/* Stack arguments are *always* at least 8 byte aligned.  */
    680 			if (align < 8)
    681 				align = 8;
    682 
    683 			/* Pass this argument in memory.  */
    684 			argp = (void *) ALIGN (argp, align);
    685 			avalue[i] = argp;
    686 			argp += arg_types[i]->size;
    687 		}
    688 
    689 #if !defined(X86_DARWIN)
    690 		/*	If the argument is in a single register, or two consecutive
    691 			registers, then we can use that address directly.  */
    692 		else if (n == 1 || (n == 2 &&
    693 		   SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
    694 		{
    695 			// The argument is in a single register.
    696 			if (SSE_CLASS_P (classes[0]))
    697 			{
    698 				avalue[i] = &reg_args->sse[ssecount];
    699 				ssecount += n;
    700 			}
    701 			else
    702 			{
    703 				avalue[i] = &reg_args->gpr[gprcount];
    704 				gprcount += n;
    705 			}
    706 		}
    707 #endif
    708 
    709 		/* Otherwise, allocate space to make them consecutive.  */
    710 		else
    711 		{
    712 			char *a = alloca (16);
    713 			int j;
    714 
    715 			avalue[i] = a;
    716 
    717 			for (j = 0; j < n; j++, a += 8)
    718 			{
    719 				if (SSE_CLASS_P (classes[j]))
    720 					memcpy (a, &reg_args->sse[ssecount++], 8);
    721 				else
    722 					memcpy (a, &reg_args->gpr[gprcount++], 8);
    723 			}
    724 		}
    725 	}
    726 
    727 	/* Invoke the closure.  */
    728 	closure->fun (cif, rvalue, avalue, closure->user_data);
    729 
    730 	/* Tell assembly how to perform return type promotions.  */
    731 	return ret;
    732 }
    733 
    734 #endif /* __x86_64__ */
    735