Home | History | Annotate | Download | only in x86
      1 /* -----------------------------------------------------------------------
      2    ffi.c - Copyright (c) 2002, 2007  Bo Thorsen <bo (at) suse.de>
      3            Copyright (c) 2008  Red Hat, Inc.
      4 
      5    x86-64 Foreign Function Interface
      6 
      7    Permission is hereby granted, free of charge, to any person obtaining
      8    a copy of this software and associated documentation files (the
      9    ``Software''), to deal in the Software without restriction, including
     10    without limitation the rights to use, copy, modify, merge, publish,
     11    distribute, sublicense, and/or sell copies of the Software, and to
     12    permit persons to whom the Software is furnished to do so, subject to
     13    the following conditions:
     14 
     15    The above copyright notice and this permission notice shall be included
     16    in all copies or substantial portions of the Software.
     17 
     18    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
     19    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     21    NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
     22    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     23    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     24    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     25    DEALINGS IN THE SOFTWARE.
     26    ----------------------------------------------------------------------- */
     27 
     28 #include <ffi.h>
     29 #include <ffi_common.h>
     30 
     31 #include <stdlib.h>
     32 #include <stdarg.h>
     33 
     34 #ifdef __x86_64__
     35 
     36 #define MAX_GPR_REGS 6
     37 #define MAX_SSE_REGS 8
     38 
     39 struct register_args
     40 {
     41   /* Registers for argument passing.  */
     42   UINT64 gpr[MAX_GPR_REGS];
     43   __int128_t sse[MAX_SSE_REGS];
     44 };
     45 
     46 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
     47 			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
     48 
     49 /* All reference to register classes here is identical to the code in
     50    gcc/config/i386/i386.c. Do *not* change one without the other.  */
     51 
     52 /* Register class used for passing given 64bit part of the argument.
     53    These represent classes as documented by the PS ABI, with the exception
     54    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     55    use SF or DFmode move instead of DImode to avoid reformating penalties.
     56 
     57    Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
     58    whenever possible (upper half does contain padding).  */
     59 enum x86_64_reg_class
     60   {
     61     X86_64_NO_CLASS,
     62     X86_64_INTEGER_CLASS,
     63     X86_64_INTEGERSI_CLASS,
     64     X86_64_SSE_CLASS,
     65     X86_64_SSESF_CLASS,
     66     X86_64_SSEDF_CLASS,
     67     X86_64_SSEUP_CLASS,
     68     X86_64_X87_CLASS,
     69     X86_64_X87UP_CLASS,
     70     X86_64_COMPLEX_X87_CLASS,
     71     X86_64_MEMORY_CLASS
     72   };
     73 
     74 #define MAX_CLASSES 4
     75 
     76 #define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
     77 
     78 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
     79    of this code is to classify each 8bytes of incoming argument by the register
     80    class and assign registers accordingly.  */
     81 
     82 /* Return the union class of CLASS1 and CLASS2.
     83    See the x86-64 PS ABI for details.  */
     84 
     85 static enum x86_64_reg_class
     86 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
     87 {
     88   /* Rule #1: If both classes are equal, this is the resulting class.  */
     89   if (class1 == class2)
     90     return class1;
     91 
     92   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
     93      the other class.  */
     94   if (class1 == X86_64_NO_CLASS)
     95     return class2;
     96   if (class2 == X86_64_NO_CLASS)
     97     return class1;
     98 
     99   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    100   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    101     return X86_64_MEMORY_CLASS;
    102 
    103   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    104   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
    105       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
    106     return X86_64_INTEGERSI_CLASS;
    107   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    108       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    109     return X86_64_INTEGER_CLASS;
    110 
    111   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    112      MEMORY is used.  */
    113   if (class1 == X86_64_X87_CLASS
    114       || class1 == X86_64_X87UP_CLASS
    115       || class1 == X86_64_COMPLEX_X87_CLASS
    116       || class2 == X86_64_X87_CLASS
    117       || class2 == X86_64_X87UP_CLASS
    118       || class2 == X86_64_COMPLEX_X87_CLASS)
    119     return X86_64_MEMORY_CLASS;
    120 
    121   /* Rule #6: Otherwise class SSE is used.  */
    122   return X86_64_SSE_CLASS;
    123 }
    124 
    125 /* Classify the argument of type TYPE and mode MODE.
    126    CLASSES will be filled by the register class used to pass each word
    127    of the operand.  The number of words is returned.  In case the parameter
    128    should be passed in memory, 0 is returned. As a special case for zero
    129    sized containers, classes[0] will be NO_CLASS and 1 is returned.
    130 
    131    See the x86-64 PS ABI for details.
    132 */
    133 static int
    134 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
    135 		   size_t byte_offset)
    136 {
    137   switch (type->type)
    138     {
    139     case FFI_TYPE_UINT8:
    140     case FFI_TYPE_SINT8:
    141     case FFI_TYPE_UINT16:
    142     case FFI_TYPE_SINT16:
    143     case FFI_TYPE_UINT32:
    144     case FFI_TYPE_SINT32:
    145     case FFI_TYPE_UINT64:
    146     case FFI_TYPE_SINT64:
    147     case FFI_TYPE_POINTER:
    148       if (byte_offset + type->size <= 4)
    149 	classes[0] = X86_64_INTEGERSI_CLASS;
    150       else
    151 	classes[0] = X86_64_INTEGER_CLASS;
    152       return 1;
    153     case FFI_TYPE_FLOAT:
    154       if (byte_offset == 0)
    155 	classes[0] = X86_64_SSESF_CLASS;
    156       else
    157 	classes[0] = X86_64_SSE_CLASS;
    158       return 1;
    159     case FFI_TYPE_DOUBLE:
    160       classes[0] = X86_64_SSEDF_CLASS;
    161       return 1;
    162     case FFI_TYPE_LONGDOUBLE:
    163       classes[0] = X86_64_X87_CLASS;
    164       classes[1] = X86_64_X87UP_CLASS;
    165       return 2;
    166     case FFI_TYPE_STRUCT:
    167       {
    168 	const int UNITS_PER_WORD = 8;
    169 	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
    170 	ffi_type **ptr;
    171 	int i;
    172 	enum x86_64_reg_class subclasses[MAX_CLASSES];
    173 
    174 	/* If the struct is larger than 16 bytes, pass it on the stack.  */
    175 	if (type->size > 16)
    176 	  return 0;
    177 
    178 	for (i = 0; i < words; i++)
    179 	  classes[i] = X86_64_NO_CLASS;
    180 
    181 	/* Merge the fields of structure.  */
    182 	for (ptr = type->elements; *ptr != NULL; ptr++)
    183 	  {
    184 	    int num;
    185 
    186 	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
    187 
    188 	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
    189 	    if (num == 0)
    190 	      return 0;
    191 	    for (i = 0; i < num; i++)
    192 	      {
    193 		int pos = byte_offset / 8;
    194 		classes[i + pos] =
    195 		  merge_classes (subclasses[i], classes[i + pos]);
    196 	      }
    197 
    198 	    byte_offset += (*ptr)->size;
    199 	  }
    200 
    201 	/* Final merger cleanup.  */
    202 	for (i = 0; i < words; i++)
    203 	  {
    204 	    /* If one class is MEMORY, everything should be passed in
    205 	       memory.  */
    206 	    if (classes[i] == X86_64_MEMORY_CLASS)
    207 	      return 0;
    208 
    209 	    /* The X86_64_SSEUP_CLASS should be always preceded by
    210 	       X86_64_SSE_CLASS.  */
    211 	    if (classes[i] == X86_64_SSEUP_CLASS
    212 		&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
    213 	      classes[i] = X86_64_SSE_CLASS;
    214 
    215 	    /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
    216 	    if (classes[i] == X86_64_X87UP_CLASS
    217 		&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
    218 	      classes[i] = X86_64_SSE_CLASS;
    219 	  }
    220 	return words;
    221       }
    222 
    223     default:
    224       FFI_ASSERT(0);
    225     }
    226   return 0; /* Never reached.  */
    227 }
    228 
    229 /* Examine the argument and return set number of register required in each
    230    class.  Return zero iff parameter should be passed in memory, otherwise
    231    the number of registers.  */
    232 
    233 static int
    234 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
    235 		  _Bool in_return, int *pngpr, int *pnsse)
    236 {
    237   int i, n, ngpr, nsse;
    238 
    239   n = classify_argument (type, classes, 0);
    240   if (n == 0)
    241     return 0;
    242 
    243   ngpr = nsse = 0;
    244   for (i = 0; i < n; ++i)
    245     switch (classes[i])
    246       {
    247       case X86_64_INTEGER_CLASS:
    248       case X86_64_INTEGERSI_CLASS:
    249 	ngpr++;
    250 	break;
    251       case X86_64_SSE_CLASS:
    252       case X86_64_SSESF_CLASS:
    253       case X86_64_SSEDF_CLASS:
    254 	nsse++;
    255 	break;
    256       case X86_64_NO_CLASS:
    257       case X86_64_SSEUP_CLASS:
    258 	break;
    259       case X86_64_X87_CLASS:
    260       case X86_64_X87UP_CLASS:
    261       case X86_64_COMPLEX_X87_CLASS:
    262 	return in_return != 0;
    263       default:
    264 	abort ();
    265       }
    266 
    267   *pngpr = ngpr;
    268   *pnsse = nsse;
    269 
    270   return n;
    271 }
    272 
    273 /* Perform machine dependent cif processing.  */
    274 
    275 ffi_status
    276 ffi_prep_cif_machdep (ffi_cif *cif)
    277 {
    278   int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
    279   enum x86_64_reg_class classes[MAX_CLASSES];
    280   size_t bytes;
    281 
    282   gprcount = ssecount = 0;
    283 
    284   flags = cif->rtype->type;
    285   if (flags != FFI_TYPE_VOID)
    286     {
    287       n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
    288       if (n == 0)
    289 	{
    290 	  /* The return value is passed in memory.  A pointer to that
    291 	     memory is the first argument.  Allocate a register for it.  */
    292 	  gprcount++;
    293 	  /* We don't have to do anything in asm for the return.  */
    294 	  flags = FFI_TYPE_VOID;
    295 	}
    296       else if (flags == FFI_TYPE_STRUCT)
    297 	{
    298 	  /* Mark which registers the result appears in.  */
    299 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
    300 	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
    301 	  if (sse0 && !sse1)
    302 	    flags |= 1 << 8;
    303 	  else if (!sse0 && sse1)
    304 	    flags |= 1 << 9;
    305 	  else if (sse0 && sse1)
    306 	    flags |= 1 << 10;
    307 	  /* Mark the true size of the structure.  */
    308 	  flags |= cif->rtype->size << 12;
    309 	}
    310     }
    311 
    312   /* Go over all arguments and determine the way they should be passed.
    313      If it's in a register and there is space for it, let that be so. If
    314      not, add it's size to the stack byte count.  */
    315   for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
    316     {
    317       if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
    318 	  || gprcount + ngpr > MAX_GPR_REGS
    319 	  || ssecount + nsse > MAX_SSE_REGS)
    320 	{
    321 	  long align = cif->arg_types[i]->alignment;
    322 
    323 	  if (align < 8)
    324 	    align = 8;
    325 
    326 	  bytes = ALIGN(bytes, align);
    327 	  bytes += cif->arg_types[i]->size;
    328 	}
    329       else
    330 	{
    331 	  gprcount += ngpr;
    332 	  ssecount += nsse;
    333 	}
    334     }
    335   if (ssecount)
    336     flags |= 1 << 11;
    337   cif->flags = flags;
    338   cif->bytes = bytes;
    339 
    340   return FFI_OK;
    341 }
    342 
    343 void
    344 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
    345 {
    346   enum x86_64_reg_class classes[MAX_CLASSES];
    347   char *stack, *argp;
    348   ffi_type **arg_types;
    349   int gprcount, ssecount, ngpr, nsse, i, avn;
    350   _Bool ret_in_memory;
    351   struct register_args *reg_args;
    352 
    353   /* Can't call 32-bit mode from 64-bit mode.  */
    354   FFI_ASSERT (cif->abi == FFI_UNIX64);
    355 
    356   /* If the return value is a struct and we don't have a return value
    357      address then we need to make one.  Note the setting of flags to
    358      VOID above in ffi_prep_cif_machdep.  */
    359   ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
    360 		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
    361   if (rvalue == NULL && ret_in_memory)
    362     rvalue = alloca (cif->rtype->size);
    363 
    364   /* Allocate the space for the arguments, plus 4 words of temp space.  */
    365   stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
    366   reg_args = (struct register_args *) stack;
    367   argp = stack + sizeof (struct register_args);
    368 
    369   gprcount = ssecount = 0;
    370 
    371   /* If the return value is passed in memory, add the pointer as the
    372      first integer argument.  */
    373   if (ret_in_memory)
    374     reg_args->gpr[gprcount++] = (long) rvalue;
    375 
    376   avn = cif->nargs;
    377   arg_types = cif->arg_types;
    378 
    379   for (i = 0; i < avn; ++i)
    380     {
    381       size_t size = arg_types[i]->size;
    382       int n;
    383 
    384       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
    385       if (n == 0
    386 	  || gprcount + ngpr > MAX_GPR_REGS
    387 	  || ssecount + nsse > MAX_SSE_REGS)
    388 	{
    389 	  long align = arg_types[i]->alignment;
    390 
    391 	  /* Stack arguments are *always* at least 8 byte aligned.  */
    392 	  if (align < 8)
    393 	    align = 8;
    394 
    395 	  /* Pass this argument in memory.  */
    396 	  argp = (void *) ALIGN (argp, align);
    397 	  memcpy (argp, avalue[i], size);
    398 	  argp += size;
    399 	}
    400       else
    401 	{
    402 	  /* The argument is passed entirely in registers.  */
    403 	  char *a = (char *) avalue[i];
    404 	  int j;
    405 
    406 	  for (j = 0; j < n; j++, a += 8, size -= 8)
    407 	    {
    408 	      switch (classes[j])
    409 		{
    410 		case X86_64_INTEGER_CLASS:
    411 		case X86_64_INTEGERSI_CLASS:
    412 		  reg_args->gpr[gprcount] = 0;
    413 		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
    414 		  gprcount++;
    415 		  break;
    416 		case X86_64_SSE_CLASS:
    417 		case X86_64_SSEDF_CLASS:
    418 		  reg_args->sse[ssecount++] = *(UINT64 *) a;
    419 		  break;
    420 		case X86_64_SSESF_CLASS:
    421 		  reg_args->sse[ssecount++] = *(UINT32 *) a;
    422 		  break;
    423 		default:
    424 		  abort();
    425 		}
    426 	    }
    427 	}
    428     }
    429 
    430   ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
    431 		   cif->flags, rvalue, fn, ssecount);
    432 }
    433 
    434 
    435 extern void ffi_closure_unix64(void);
    436 
    437 ffi_status
    438 ffi_prep_closure_loc (ffi_closure* closure,
    439 		      ffi_cif* cif,
    440 		      void (*fun)(ffi_cif*, void*, void**, void*),
    441 		      void *user_data,
    442 		      void *codeloc)
    443 {
    444   volatile unsigned short *tramp;
    445 
    446   tramp = (volatile unsigned short *) &closure->tramp[0];
    447 
    448   tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
    449   *(void * volatile *) &tramp[1] = ffi_closure_unix64;
    450   tramp[5] = 0xba49;		/* mov <data>, %r10	*/
    451   *(void * volatile *) &tramp[6] = codeloc;
    452 
    453   /* Set the carry bit iff the function uses any sse registers.
    454      This is clc or stc, together with the first byte of the jmp.  */
    455   tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
    456 
    457   tramp[11] = 0xe3ff;			/* jmp *%r11    */
    458 
    459   closure->cif = cif;
    460   closure->fun = fun;
    461   closure->user_data = user_data;
    462 
    463   return FFI_OK;
    464 }
    465 
    466 int
    467 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
    468 			 struct register_args *reg_args, char *argp)
    469 {
    470   ffi_cif *cif;
    471   void **avalue;
    472   ffi_type **arg_types;
    473   long i, avn;
    474   int gprcount, ssecount, ngpr, nsse;
    475   int ret;
    476 
    477   cif = closure->cif;
    478   avalue = alloca(cif->nargs * sizeof(void *));
    479   gprcount = ssecount = 0;
    480 
    481   ret = cif->rtype->type;
    482   if (ret != FFI_TYPE_VOID)
    483     {
    484       enum x86_64_reg_class classes[MAX_CLASSES];
    485       int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
    486       if (n == 0)
    487 	{
    488 	  /* The return value goes in memory.  Arrange for the closure
    489 	     return value to go directly back to the original caller.  */
    490 	  rvalue = (void *) reg_args->gpr[gprcount++];
    491 	  /* We don't have to do anything in asm for the return.  */
    492 	  ret = FFI_TYPE_VOID;
    493 	}
    494       else if (ret == FFI_TYPE_STRUCT && n == 2)
    495 	{
    496 	  /* Mark which register the second word of the structure goes in.  */
    497 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
    498 	  _Bool sse1 = SSE_CLASS_P (classes[1]);
    499 	  if (!sse0 && sse1)
    500 	    ret |= 1 << 8;
    501 	  else if (sse0 && !sse1)
    502 	    ret |= 1 << 9;
    503 	}
    504     }
    505 
    506   avn = cif->nargs;
    507   arg_types = cif->arg_types;
    508 
    509   for (i = 0; i < avn; ++i)
    510     {
    511       enum x86_64_reg_class classes[MAX_CLASSES];
    512       int n;
    513 
    514       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
    515       if (n == 0
    516 	  || gprcount + ngpr > MAX_GPR_REGS
    517 	  || ssecount + nsse > MAX_SSE_REGS)
    518 	{
    519 	  long align = arg_types[i]->alignment;
    520 
    521 	  /* Stack arguments are *always* at least 8 byte aligned.  */
    522 	  if (align < 8)
    523 	    align = 8;
    524 
    525 	  /* Pass this argument in memory.  */
    526 	  argp = (void *) ALIGN (argp, align);
    527 	  avalue[i] = argp;
    528 	  argp += arg_types[i]->size;
    529 	}
    530       /* If the argument is in a single register, or two consecutive
    531 	 registers, then we can use that address directly.  */
    532       else if (n == 1
    533 	       || (n == 2
    534 		   && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
    535 	{
    536 	  /* The argument is in a single register.  */
    537 	  if (SSE_CLASS_P (classes[0]))
    538 	    {
    539 	      avalue[i] = &reg_args->sse[ssecount];
    540 	      ssecount += n;
    541 	    }
    542 	  else
    543 	    {
    544 	      avalue[i] = &reg_args->gpr[gprcount];
    545 	      gprcount += n;
    546 	    }
    547 	}
    548       /* Otherwise, allocate space to make them consecutive.  */
    549       else
    550 	{
    551 	  char *a = alloca (16);
    552 	  int j;
    553 
    554 	  avalue[i] = a;
    555 	  for (j = 0; j < n; j++, a += 8)
    556 	    {
    557 	      if (SSE_CLASS_P (classes[j]))
    558 		memcpy (a, &reg_args->sse[ssecount++], 8);
    559 	      else
    560 		memcpy (a, &reg_args->gpr[gprcount++], 8);
    561 	    }
    562 	}
    563     }
    564 
    565   /* Invoke the closure.  */
    566   closure->fun (cif, rvalue, avalue, closure->user_data);
    567 
    568   /* Tell assembly how to perform return type promotions.  */
    569   return ret;
    570 }
    571 
    572 #endif /* __x86_64__ */
    573