Home | History | Annotate | Download | only in pa
      1 /* -----------------------------------------------------------------------
      2    ffi.c - (c) 2011 Anthony Green
      3            (c) 2008 Red Hat, Inc.
      4 	   (c) 2006 Free Software Foundation, Inc.
      5            (c) 2003-2004 Randolph Chung <tausq (at) debian.org>
      6 
      7    HPPA Foreign Function Interface
      8    HP-UX PA ABI support
      9 
     10    Permission is hereby granted, free of charge, to any person obtaining
     11    a copy of this software and associated documentation files (the
     12    ``Software''), to deal in the Software without restriction, including
     13    without limitation the rights to use, copy, modify, merge, publish,
     14    distribute, sublicense, and/or sell copies of the Software, and to
     15    permit persons to whom the Software is furnished to do so, subject to
     16    the following conditions:
     17 
     18    The above copyright notice and this permission notice shall be included
     19    in all copies or substantial portions of the Software.
     20 
     21    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
     22    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     23    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     24    NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
     25    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     26    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     27    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     28    DEALINGS IN THE SOFTWARE.
     29    ----------------------------------------------------------------------- */
     30 
     31 #include <ffi.h>
     32 #include <ffi_common.h>
     33 
     34 #include <stdlib.h>
     35 #include <stdio.h>
     36 
     37 #define ROUND_UP(v, a)  (((size_t)(v) + (a) - 1) & ~((a) - 1))
     38 
     39 #define MIN_STACK_SIZE  64
     40 #define FIRST_ARG_SLOT  9
     41 #define DEBUG_LEVEL   0
     42 
     43 #define fldw(addr, fpreg) \
     44   __asm__ volatile ("fldw 0(%0), %%" #fpreg "L" : : "r"(addr) : #fpreg)
     45 #define fstw(fpreg, addr) \
     46   __asm__ volatile ("fstw %%" #fpreg "L, 0(%0)" : : "r"(addr))
     47 #define fldd(addr, fpreg) \
     48   __asm__ volatile ("fldd 0(%0), %%" #fpreg : : "r"(addr) : #fpreg)
     49 #define fstd(fpreg, addr) \
     50   __asm__ volatile ("fstd %%" #fpreg "L, 0(%0)" : : "r"(addr))
     51 
     52 #define debug(lvl, x...) do { if (lvl <= DEBUG_LEVEL) { printf(x); } } while (0)
     53 
     54 static inline int ffi_struct_type(ffi_type *t)
     55 {
     56   size_t sz = t->size;
     57 
     58   /* Small structure results are passed in registers,
     59      larger ones are passed by pointer.  Note that
     60      small structures of size 2, 4 and 8 differ from
     61      the corresponding integer types in that they have
     62      different alignment requirements.  */
     63 
     64   if (sz <= 1)
     65     return FFI_TYPE_UINT8;
     66   else if (sz == 2)
     67     return FFI_TYPE_SMALL_STRUCT2;
     68   else if (sz == 3)
     69     return FFI_TYPE_SMALL_STRUCT3;
     70   else if (sz == 4)
     71     return FFI_TYPE_SMALL_STRUCT4;
     72   else if (sz == 5)
     73     return FFI_TYPE_SMALL_STRUCT5;
     74   else if (sz == 6)
     75     return FFI_TYPE_SMALL_STRUCT6;
     76   else if (sz == 7)
     77     return FFI_TYPE_SMALL_STRUCT7;
     78   else if (sz <= 8)
     79     return FFI_TYPE_SMALL_STRUCT8;
     80   else
     81     return FFI_TYPE_STRUCT; /* else, we pass it by pointer.  */
     82 }
     83 
     84 /* PA has a downward growing stack, which looks like this:
     85 
     86    Offset
     87 	[ Variable args ]
     88    SP = (4*(n+9))       arg word N
     89    ...
     90    SP-52                arg word 4
     91 	[ Fixed args ]
     92    SP-48                arg word 3
     93    SP-44                arg word 2
     94    SP-40                arg word 1
     95    SP-36                arg word 0
     96 	[ Frame marker ]
     97    ...
     98    SP-20                RP
     99    SP-4                 previous SP
    100 
    101    The first four argument words on the stack are reserved for use by
    102    the callee.  Instead, the general and floating registers replace
    103    the first four argument slots.  Non FP arguments are passed solely
    104    in the general registers.  FP arguments are passed in both general
    105    and floating registers when using libffi.
    106 
    107    Non-FP 32-bit args are passed in gr26, gr25, gr24 and gr23.
    108    Non-FP 64-bit args are passed in register pairs, starting
    109    on an odd numbered register (i.e. r25+r26 and r23+r24).
    110    FP 32-bit arguments are passed in fr4L, fr5L, fr6L and fr7L.
    111    FP 64-bit arguments are passed in fr5 and fr7.
    112 
    113    The registers are allocated in the same manner as stack slots.
    114    This allows the callee to save its arguments on the stack if
    115    necessary:
    116 
    117    arg word 3 -> gr23 or fr7L
    118    arg word 2 -> gr24 or fr6L or fr7R
    119    arg word 1 -> gr25 or fr5L
    120    arg word 0 -> gr26 or fr4L or fr5R
    121 
    122    Note that fr4R and fr6R are never used for arguments (i.e.,
    123    doubles are not passed in fr4 or fr6).
    124 
    125    The rest of the arguments are passed on the stack starting at SP-52,
    126    but 64-bit arguments need to be aligned to an 8-byte boundary
    127 
    128    This means we can have holes either in the register allocation,
    129    or in the stack.  */
    130 
    131 /* ffi_prep_args is called by the assembly routine once stack space
    132    has been allocated for the function's arguments
    133 
    134    The following code will put everything into the stack frame
    135    (which was allocated by the asm routine), and on return
    136    the asm routine will load the arguments that should be
    137    passed by register into the appropriate registers
    138 
    139    NOTE: We load floating point args in this function... that means we
    140    assume gcc will not mess with fp regs in here.  */
    141 
    142 void ffi_prep_args_pa32(UINT32 *stack, extended_cif *ecif, unsigned bytes)
    143 {
    144   register unsigned int i;
    145   register ffi_type **p_arg;
    146   register void **p_argv;
    147   unsigned int slot = FIRST_ARG_SLOT;
    148   char *dest_cpy;
    149   size_t len;
    150 
    151   debug(1, "%s: stack = %p, ecif = %p, bytes = %u\n", __FUNCTION__, stack,
    152 	ecif, bytes);
    153 
    154   p_arg = ecif->cif->arg_types;
    155   p_argv = ecif->avalue;
    156 
    157   for (i = 0; i < ecif->cif->nargs; i++)
    158     {
    159       int type = (*p_arg)->type;
    160 
    161       switch (type)
    162 	{
    163 	case FFI_TYPE_SINT8:
    164 	  *(SINT32 *)(stack - slot) = *(SINT8 *)(*p_argv);
    165 	  break;
    166 
    167 	case FFI_TYPE_UINT8:
    168 	  *(UINT32 *)(stack - slot) = *(UINT8 *)(*p_argv);
    169 	  break;
    170 
    171 	case FFI_TYPE_SINT16:
    172 	  *(SINT32 *)(stack - slot) = *(SINT16 *)(*p_argv);
    173 	  break;
    174 
    175 	case FFI_TYPE_UINT16:
    176 	  *(UINT32 *)(stack - slot) = *(UINT16 *)(*p_argv);
    177 	  break;
    178 
    179 	case FFI_TYPE_UINT32:
    180 	case FFI_TYPE_SINT32:
    181 	case FFI_TYPE_POINTER:
    182 	  debug(3, "Storing UINT32 %u in slot %u\n", *(UINT32 *)(*p_argv),
    183 		slot);
    184 	  *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv);
    185 	  break;
    186 
    187 	case FFI_TYPE_UINT64:
    188 	case FFI_TYPE_SINT64:
    189 	  /* Align slot for 64-bit type.  */
    190 	  slot += (slot & 1) ? 1 : 2;
    191 	  *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv);
    192 	  break;
    193 
    194 	case FFI_TYPE_FLOAT:
    195 	  /* First 4 args go in fr4L - fr7L.  */
    196 	  debug(3, "Storing UINT32(float) in slot %u\n", slot);
    197 	  *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv);
    198 	  switch (slot - FIRST_ARG_SLOT)
    199 	    {
    200 	    /* First 4 args go in fr4L - fr7L.  */
    201 	    case 0: fldw(stack - slot, fr4); break;
    202 	    case 1: fldw(stack - slot, fr5); break;
    203 	    case 2: fldw(stack - slot, fr6); break;
    204 	    case 3: fldw(stack - slot, fr7); break;
    205 	    }
    206 	  break;
    207 
    208 	case FFI_TYPE_DOUBLE:
    209 	  /* Align slot for 64-bit type.  */
    210 	  slot += (slot & 1) ? 1 : 2;
    211 	  debug(3, "Storing UINT64(double) at slot %u\n", slot);
    212 	  *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv);
    213 	  switch (slot - FIRST_ARG_SLOT)
    214 	    {
    215 	      /* First 2 args go in fr5, fr7.  */
    216 	      case 1: fldd(stack - slot, fr5); break;
    217 	      case 3: fldd(stack - slot, fr7); break;
    218 	    }
    219 	  break;
    220 
    221 #ifdef PA_HPUX
    222 	case FFI_TYPE_LONGDOUBLE:
    223 	  /* Long doubles are passed in the same manner as structures
    224 	     larger than 8 bytes.  */
    225 	  *(UINT32 *)(stack - slot) = (UINT32)(*p_argv);
    226 	  break;
    227 #endif
    228 
    229 	case FFI_TYPE_STRUCT:
    230 
    231 	  /* Structs smaller or equal than 4 bytes are passed in one
    232 	     register. Structs smaller or equal 8 bytes are passed in two
    233 	     registers. Larger structures are passed by pointer.  */
    234 
    235 	  len = (*p_arg)->size;
    236 	  if (len <= 4)
    237 	    {
    238 	      dest_cpy = (char *)(stack - slot) + 4 - len;
    239 	      memcpy(dest_cpy, (char *)*p_argv, len);
    240 	    }
    241 	  else if (len <= 8)
    242 	    {
    243 	      slot += (slot & 1) ? 1 : 2;
    244 	      dest_cpy = (char *)(stack - slot) + 8 - len;
    245 	      memcpy(dest_cpy, (char *)*p_argv, len);
    246 	    }
    247 	  else
    248 	    *(UINT32 *)(stack - slot) = (UINT32)(*p_argv);
    249 	  break;
    250 
    251 	default:
    252 	  FFI_ASSERT(0);
    253 	}
    254 
    255       slot++;
    256       p_arg++;
    257       p_argv++;
    258     }
    259 
    260   /* Make sure we didn't mess up and scribble on the stack.  */
    261   {
    262     unsigned int n;
    263 
    264     debug(5, "Stack setup:\n");
    265     for (n = 0; n < (bytes + 3) / 4; n++)
    266       {
    267 	if ((n%4) == 0) { debug(5, "\n%08x: ", (unsigned int)(stack - n)); }
    268 	debug(5, "%08x ", *(stack - n));
    269       }
    270     debug(5, "\n");
    271   }
    272 
    273   FFI_ASSERT(slot * 4 <= bytes);
    274 
    275   return;
    276 }
    277 
    278 static void ffi_size_stack_pa32(ffi_cif *cif)
    279 {
    280   ffi_type **ptr;
    281   int i;
    282   int z = 0; /* # stack slots */
    283 
    284   for (ptr = cif->arg_types, i = 0; i < cif->nargs; ptr++, i++)
    285     {
    286       int type = (*ptr)->type;
    287 
    288       switch (type)
    289 	{
    290 	case FFI_TYPE_DOUBLE:
    291 	case FFI_TYPE_UINT64:
    292 	case FFI_TYPE_SINT64:
    293 	  z += 2 + (z & 1); /* must start on even regs, so we may waste one */
    294 	  break;
    295 
    296 #ifdef PA_HPUX
    297 	case FFI_TYPE_LONGDOUBLE:
    298 #endif
    299 	case FFI_TYPE_STRUCT:
    300 	  z += 1; /* pass by ptr, callee will copy */
    301 	  break;
    302 
    303 	default: /* <= 32-bit values */
    304 	  z++;
    305 	}
    306     }
    307 
    308   /* We can fit up to 6 args in the default 64-byte stack frame,
    309      if we need more, we need more stack.  */
    310   if (z <= 6)
    311     cif->bytes = MIN_STACK_SIZE; /* min stack size */
    312   else
    313     cif->bytes = 64 + ROUND_UP((z - 6) * sizeof(UINT32), MIN_STACK_SIZE);
    314 
    315   debug(3, "Calculated stack size is %u bytes\n", cif->bytes);
    316 }
    317 
    318 /* Perform machine dependent cif processing.  */
    319 ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
    320 {
    321   /* Set the return type flag */
    322   switch (cif->rtype->type)
    323     {
    324     case FFI_TYPE_VOID:
    325     case FFI_TYPE_FLOAT:
    326     case FFI_TYPE_DOUBLE:
    327       cif->flags = (unsigned) cif->rtype->type;
    328       break;
    329 
    330 #ifdef PA_HPUX
    331     case FFI_TYPE_LONGDOUBLE:
    332       /* Long doubles are treated like a structure.  */
    333       cif->flags = FFI_TYPE_STRUCT;
    334       break;
    335 #endif
    336 
    337     case FFI_TYPE_STRUCT:
    338       /* For the return type we have to check the size of the structures.
    339 	 If the size is smaller or equal 4 bytes, the result is given back
    340 	 in one register. If the size is smaller or equal 8 bytes than we
    341 	 return the result in two registers. But if the size is bigger than
    342 	 8 bytes, we work with pointers.  */
    343       cif->flags = ffi_struct_type(cif->rtype);
    344       break;
    345 
    346     case FFI_TYPE_UINT64:
    347     case FFI_TYPE_SINT64:
    348       cif->flags = FFI_TYPE_UINT64;
    349       break;
    350 
    351     default:
    352       cif->flags = FFI_TYPE_INT;
    353       break;
    354     }
    355 
    356   /* Lucky us, because of the unique PA ABI we get to do our
    357      own stack sizing.  */
    358   switch (cif->abi)
    359     {
    360     case FFI_PA32:
    361       ffi_size_stack_pa32(cif);
    362       break;
    363 
    364     default:
    365       FFI_ASSERT(0);
    366       break;
    367     }
    368 
    369   return FFI_OK;
    370 }
    371 
    372 extern void ffi_call_pa32(void (*)(UINT32 *, extended_cif *, unsigned),
    373 			  extended_cif *, unsigned, unsigned, unsigned *,
    374 			  void (*fn)(void));
    375 
    376 void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
    377 {
    378   extended_cif ecif;
    379 
    380   ecif.cif = cif;
    381   ecif.avalue = avalue;
    382 
    383   /* If the return value is a struct and we don't have a return
    384      value address then we need to make one.  */
    385 
    386   if (rvalue == NULL
    387 #ifdef PA_HPUX
    388       && (cif->rtype->type == FFI_TYPE_STRUCT
    389 	  || cif->rtype->type == FFI_TYPE_LONGDOUBLE))
    390 #else
    391       && cif->rtype->type == FFI_TYPE_STRUCT)
    392 #endif
    393     {
    394       ecif.rvalue = alloca(cif->rtype->size);
    395     }
    396   else
    397     ecif.rvalue = rvalue;
    398 
    399 
    400   switch (cif->abi)
    401     {
    402     case FFI_PA32:
    403       debug(3, "Calling ffi_call_pa32: ecif=%p, bytes=%u, flags=%u, rvalue=%p, fn=%p\n", &ecif, cif->bytes, cif->flags, ecif.rvalue, (void *)fn);
    404       ffi_call_pa32(ffi_prep_args_pa32, &ecif, cif->bytes,
    405 		     cif->flags, ecif.rvalue, fn);
    406       break;
    407 
    408     default:
    409       FFI_ASSERT(0);
    410       break;
    411     }
    412 }
    413 
    414 #if FFI_CLOSURES
    415 /* This is more-or-less an inverse of ffi_call -- we have arguments on
    416    the stack, and we need to fill them into a cif structure and invoke
    417    the user function. This really ought to be in asm to make sure
    418    the compiler doesn't do things we don't expect.  */
    419 ffi_status ffi_closure_inner_pa32(ffi_closure *closure, UINT32 *stack)
    420 {
    421   ffi_cif *cif;
    422   void **avalue;
    423   void *rvalue;
    424   UINT32 ret[2]; /* function can return up to 64-bits in registers */
    425   ffi_type **p_arg;
    426   char *tmp;
    427   int i, avn;
    428   unsigned int slot = FIRST_ARG_SLOT;
    429   register UINT32 r28 asm("r28");
    430 
    431   cif = closure->cif;
    432 
    433   /* If returning via structure, callee will write to our pointer.  */
    434   if (cif->flags == FFI_TYPE_STRUCT)
    435     rvalue = (void *)r28;
    436   else
    437     rvalue = &ret[0];
    438 
    439   avalue = (void **)alloca(cif->nargs * FFI_SIZEOF_ARG);
    440   avn = cif->nargs;
    441   p_arg = cif->arg_types;
    442 
    443   for (i = 0; i < avn; i++)
    444     {
    445       int type = (*p_arg)->type;
    446 
    447       switch (type)
    448 	{
    449 	case FFI_TYPE_SINT8:
    450 	case FFI_TYPE_UINT8:
    451 	case FFI_TYPE_SINT16:
    452 	case FFI_TYPE_UINT16:
    453 	case FFI_TYPE_SINT32:
    454 	case FFI_TYPE_UINT32:
    455 	case FFI_TYPE_POINTER:
    456 	  avalue[i] = (char *)(stack - slot) + sizeof(UINT32) - (*p_arg)->size;
    457 	  break;
    458 
    459 	case FFI_TYPE_SINT64:
    460 	case FFI_TYPE_UINT64:
    461 	  slot += (slot & 1) ? 1 : 2;
    462 	  avalue[i] = (void *)(stack - slot);
    463 	  break;
    464 
    465 	case FFI_TYPE_FLOAT:
    466 #ifdef PA_LINUX
    467 	  /* The closure call is indirect.  In Linux, floating point
    468 	     arguments in indirect calls with a prototype are passed
    469 	     in the floating point registers instead of the general
    470 	     registers.  So, we need to replace what was previously
    471 	     stored in the current slot with the value in the
    472 	     corresponding floating point register.  */
    473 	  switch (slot - FIRST_ARG_SLOT)
    474 	    {
    475 	    case 0: fstw(fr4, (void *)(stack - slot)); break;
    476 	    case 1: fstw(fr5, (void *)(stack - slot)); break;
    477 	    case 2: fstw(fr6, (void *)(stack - slot)); break;
    478 	    case 3: fstw(fr7, (void *)(stack - slot)); break;
    479 	    }
    480 #endif
    481 	  avalue[i] = (void *)(stack - slot);
    482 	  break;
    483 
    484 	case FFI_TYPE_DOUBLE:
    485 	  slot += (slot & 1) ? 1 : 2;
    486 #ifdef PA_LINUX
    487 	  /* See previous comment for FFI_TYPE_FLOAT.  */
    488 	  switch (slot - FIRST_ARG_SLOT)
    489 	    {
    490 	    case 1: fstd(fr5, (void *)(stack - slot)); break;
    491 	    case 3: fstd(fr7, (void *)(stack - slot)); break;
    492 	    }
    493 #endif
    494 	  avalue[i] = (void *)(stack - slot);
    495 	  break;
    496 
    497 #ifdef PA_HPUX
    498 	case FFI_TYPE_LONGDOUBLE:
    499 	  /* Long doubles are treated like a big structure.  */
    500 	  avalue[i] = (void *) *(stack - slot);
    501 	  break;
    502 #endif
    503 
    504 	case FFI_TYPE_STRUCT:
    505 	  /* Structs smaller or equal than 4 bytes are passed in one
    506 	     register. Structs smaller or equal 8 bytes are passed in two
    507 	     registers. Larger structures are passed by pointer.  */
    508 	  if((*p_arg)->size <= 4)
    509 	    {
    510 	      avalue[i] = (void *)(stack - slot) + sizeof(UINT32) -
    511 		(*p_arg)->size;
    512 	    }
    513 	  else if ((*p_arg)->size <= 8)
    514 	    {
    515 	      slot += (slot & 1) ? 1 : 2;
    516 	      avalue[i] = (void *)(stack - slot) + sizeof(UINT64) -
    517 		(*p_arg)->size;
    518 	    }
    519 	  else
    520 	    avalue[i] = (void *) *(stack - slot);
    521 	  break;
    522 
    523 	default:
    524 	  FFI_ASSERT(0);
    525 	}
    526 
    527       slot++;
    528       p_arg++;
    529     }
    530 
    531   /* Invoke the closure.  */
    532   (closure->fun) (cif, rvalue, avalue, closure->user_data);
    533 
    534   debug(3, "after calling function, ret[0] = %08x, ret[1] = %08x\n", ret[0],
    535 	ret[1]);
    536 
    537   /* Store the result using the lower 2 bytes of the flags.  */
    538   switch (cif->flags)
    539     {
    540     case FFI_TYPE_UINT8:
    541       *(stack - FIRST_ARG_SLOT) = (UINT8)(ret[0] >> 24);
    542       break;
    543     case FFI_TYPE_SINT8:
    544       *(stack - FIRST_ARG_SLOT) = (SINT8)(ret[0] >> 24);
    545       break;
    546     case FFI_TYPE_UINT16:
    547       *(stack - FIRST_ARG_SLOT) = (UINT16)(ret[0] >> 16);
    548       break;
    549     case FFI_TYPE_SINT16:
    550       *(stack - FIRST_ARG_SLOT) = (SINT16)(ret[0] >> 16);
    551       break;
    552     case FFI_TYPE_INT:
    553     case FFI_TYPE_SINT32:
    554     case FFI_TYPE_UINT32:
    555       *(stack - FIRST_ARG_SLOT) = ret[0];
    556       break;
    557     case FFI_TYPE_SINT64:
    558     case FFI_TYPE_UINT64:
    559       *(stack - FIRST_ARG_SLOT) = ret[0];
    560       *(stack - FIRST_ARG_SLOT - 1) = ret[1];
    561       break;
    562 
    563     case FFI_TYPE_DOUBLE:
    564       fldd(rvalue, fr4);
    565       break;
    566 
    567     case FFI_TYPE_FLOAT:
    568       fldw(rvalue, fr4);
    569       break;
    570 
    571     case FFI_TYPE_STRUCT:
    572       /* Don't need a return value, done by caller.  */
    573       break;
    574 
    575     case FFI_TYPE_SMALL_STRUCT2:
    576     case FFI_TYPE_SMALL_STRUCT3:
    577     case FFI_TYPE_SMALL_STRUCT4:
    578       tmp = (void*)(stack -  FIRST_ARG_SLOT);
    579       tmp += 4 - cif->rtype->size;
    580       memcpy((void*)tmp, &ret[0], cif->rtype->size);
    581       break;
    582 
    583     case FFI_TYPE_SMALL_STRUCT5:
    584     case FFI_TYPE_SMALL_STRUCT6:
    585     case FFI_TYPE_SMALL_STRUCT7:
    586     case FFI_TYPE_SMALL_STRUCT8:
    587       {
    588 	unsigned int ret2[2];
    589 	int off;
    590 
    591 	/* Right justify ret[0] and ret[1] */
    592 	switch (cif->flags)
    593 	  {
    594 	    case FFI_TYPE_SMALL_STRUCT5: off = 3; break;
    595 	    case FFI_TYPE_SMALL_STRUCT6: off = 2; break;
    596 	    case FFI_TYPE_SMALL_STRUCT7: off = 1; break;
    597 	    default: off = 0; break;
    598 	  }
    599 
    600 	memset (ret2, 0, sizeof (ret2));
    601 	memcpy ((char *)ret2 + off, ret, 8 - off);
    602 
    603 	*(stack - FIRST_ARG_SLOT) = ret2[0];
    604 	*(stack - FIRST_ARG_SLOT - 1) = ret2[1];
    605       }
    606       break;
    607 
    608     case FFI_TYPE_POINTER:
    609     case FFI_TYPE_VOID:
    610       break;
    611 
    612     default:
    613       debug(0, "assert with cif->flags: %d\n",cif->flags);
    614       FFI_ASSERT(0);
    615       break;
    616     }
    617   return FFI_OK;
    618 }
    619 
    620 /* Fill in a closure to refer to the specified fun and user_data.
    621    cif specifies the argument and result types for fun.
    622    The cif must already be prep'ed.  */
    623 
    624 extern void ffi_closure_pa32(void);
    625 
    626 ffi_status
    627 ffi_prep_closure_loc (ffi_closure* closure,
    628 		      ffi_cif* cif,
    629 		      void (*fun)(ffi_cif*,void*,void**,void*),
    630 		      void *user_data,
    631 		      void *codeloc)
    632 {
    633   UINT32 *tramp = (UINT32 *)(closure->tramp);
    634 #ifdef PA_HPUX
    635   UINT32 *tmp;
    636 #endif
    637 
    638   if (cif->abi != FFI_PA32)
    639     return FFI_BAD_ABI;
    640 
    641   /* Make a small trampoline that will branch to our
    642      handler function. Use PC-relative addressing.  */
    643 
    644 #ifdef PA_LINUX
    645   tramp[0] = 0xeaa00000; /* b,l .+8,%r21        ; %r21 <- pc+8 */
    646   tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21    ; mask priv bits */
    647   tramp[2] = 0x4aa10028; /* ldw 20(%r21),%r1    ; load plabel */
    648   tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21   ; get closure addr */
    649   tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22     ; address of handler */
    650   tramp[5] = 0xeac0c000; /* bv%r0(%r22)         ; branch to handler */
    651   tramp[6] = 0x0c281093; /* ldw 4(%r1),%r19     ; GP of handler */
    652   tramp[7] = ((UINT32)(ffi_closure_pa32) & ~2);
    653 
    654   /* Flush d/icache -- have to flush up 2 two lines because of
    655      alignment.  */
    656   __asm__ volatile(
    657 		   "fdc 0(%0)\n\t"
    658 		   "fdc %1(%0)\n\t"
    659 		   "fic 0(%%sr4, %0)\n\t"
    660 		   "fic %1(%%sr4, %0)\n\t"
    661 		   "sync\n\t"
    662 		   "nop\n\t"
    663 		   "nop\n\t"
    664 		   "nop\n\t"
    665 		   "nop\n\t"
    666 		   "nop\n\t"
    667 		   "nop\n\t"
    668 		   "nop\n"
    669 		   :
    670 		   : "r"((unsigned long)tramp & ~31),
    671 		     "r"(32 /* stride */)
    672 		   : "memory");
    673 #endif
    674 
    675 #ifdef PA_HPUX
    676   tramp[0] = 0xeaa00000; /* b,l .+8,%r21        ; %r21 <- pc+8  */
    677   tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21    ; mask priv bits  */
    678   tramp[2] = 0x4aa10038; /* ldw 28(%r21),%r1    ; load plabel  */
    679   tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21   ; get closure addr  */
    680   tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22     ; address of handler  */
    681   tramp[5] = 0x02c010b4; /* ldsid (%r22),%r20   ; load space id  */
    682   tramp[6] = 0x00141820; /* mtsp %r20,%sr0      ; into %sr0  */
    683   tramp[7] = 0xe2c00000; /* be 0(%sr0,%r22)     ; branch to handler  */
    684   tramp[8] = 0x0c281093; /* ldw 4(%r1),%r19     ; GP of handler  */
    685   tramp[9] = ((UINT32)(ffi_closure_pa32) & ~2);
    686 
    687   /* Flush d/icache -- have to flush three lines because of alignment.  */
    688   __asm__ volatile(
    689 		   "copy %1,%0\n\t"
    690 		   "fdc,m %2(%0)\n\t"
    691 		   "fdc,m %2(%0)\n\t"
    692 		   "fdc,m %2(%0)\n\t"
    693 		   "ldsid (%1),%0\n\t"
    694 		   "mtsp %0,%%sr0\n\t"
    695 		   "copy %1,%0\n\t"
    696 		   "fic,m %2(%%sr0,%0)\n\t"
    697 		   "fic,m %2(%%sr0,%0)\n\t"
    698 		   "fic,m %2(%%sr0,%0)\n\t"
    699 		   "sync\n\t"
    700 		   "nop\n\t"
    701 		   "nop\n\t"
    702 		   "nop\n\t"
    703 		   "nop\n\t"
    704 		   "nop\n\t"
    705 		   "nop\n\t"
    706 		   "nop\n"
    707 		   : "=&r" ((unsigned long)tmp)
    708 		   : "r" ((unsigned long)tramp & ~31),
    709 		     "r" (32/* stride */)
    710 		   : "memory");
    711 #endif
    712 
    713   closure->cif  = cif;
    714   closure->user_data = user_data;
    715   closure->fun  = fun;
    716 
    717   return FFI_OK;
    718 }
    719 #endif
    720