Initial checkin

2025-07-04 23:36:00 +02:00 · 2019-08-23 11:56:54 -07:00 · 2019-08-23 11:56:54 -07:00 · 078564ac9e
commit 078564ac9e
parent c74712dad9
3242 changed files with 1616395 additions and 0 deletions
--- a/BeefySysLib/third_party/libffi/ios/src/arm/ffi.c
+++ b/BeefySysLib/third_party/libffi/ios/src/arm/ffi.c
@ -0,0 +1,761 @@
+#ifdef __arm__
+
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 2011 Timothy Wall
+           Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+           Copyright (c) 2011 Anthony Green
+	   Copyright (c) 2011 Free Software Foundation
+           Copyright (c) 1998, 2008, 2011  Red Hat, Inc.
+	   
+   ARM Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* Forward declares. */
+static int vfp_type_p (ffi_type *);
+static void layout_vfp_args (ffi_cif *);
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments
+   
+   The vfp_space parameter is the load area for VFP regs, the return
+   value is cif->vfp_used (word bitset of VFP regs used for passing
+   arguments). These are only used for the VFP hard-float ABI.
+*/
+int ffi_prep_args(char *stack, extended_cif *ecif, float *vfp_space)
+{
+  register unsigned int i, vi = 0;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if ( ecif->cif->flags == FFI_TYPE_STRUCT ) {
+    *(void **) argp = ecif->rvalue;
+    argp += 4;
+  }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       (i != 0);
+       i--, p_arg++)
+    {
+      size_t z;
+      size_t alignment;
+
+      /* Allocated in VFP registers. */
+      if (ecif->cif->abi == FFI_VFP
+	  && vi < ecif->cif->vfp_nargs && vfp_type_p (*p_arg))
+	{
+	  float* vfp_slot = vfp_space + ecif->cif->vfp_args[vi++];
+	  if ((*p_arg)->type == FFI_TYPE_FLOAT)
+	    *((float*)vfp_slot) = *((float*)*p_argv);
+	  else if ((*p_arg)->type == FFI_TYPE_DOUBLE)
+	    *((double*)vfp_slot) = *((double*)*p_argv);
+	  else
+	    memcpy(vfp_slot, *p_argv, (*p_arg)->size);
+	  p_argv++;
+	  continue;
+	}
+
+      /* Align if necessary */
+      alignment = (*p_arg)->alignment;
+#ifdef _WIN32_WCE
+      if (alignment > 4)
+	alignment = 4;
+#endif
+      if ((alignment - 1) & (unsigned) argp) {
+	argp = (char *) ALIGN(argp, alignment);
+      }
+
+      if ((*p_arg)->type == FFI_TYPE_STRUCT)
+	argp = (char *) ALIGN(argp, 4);
+
+	  z = (*p_arg)->size;
+	  if (z < sizeof(int))
+	    {
+	      z = sizeof(int);
+	      switch ((*p_arg)->type)
+		{
+		case FFI_TYPE_SINT8:
+		  *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+		  break;
+		  
+		case FFI_TYPE_UINT8:
+		  *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+		  break;
+		  
+		case FFI_TYPE_SINT16:
+		  *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+		  break;
+		  
+		case FFI_TYPE_UINT16:
+		  *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+		  break;
+		  
+		case FFI_TYPE_STRUCT:
+		  memcpy(argp, *p_argv, (*p_arg)->size);
+		  break;
+
+		default:
+		  FFI_ASSERT(0);
+		}
+	    }
+	  else if (z == sizeof(int))
+	    {
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	    }
+	  else
+	    {
+	      memcpy(argp, *p_argv, z);
+	    }
+	  p_argv++;
+	  argp += z;
+    }
+
+  /* Indicate the VFP registers used. */
+  return ecif->cif->vfp_used;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  int type_code;
+  /* Round the stack up to a multiple of 8 bytes.  This isn't needed 
+     everywhere, but it is on some platforms, and it doesn't harm anything
+     when it isn't needed.  */
+  cif->bytes = (cif->bytes + 7) & ~7;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      cif->flags = (unsigned) FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+      if (cif->abi == FFI_VFP
+	  && (type_code = vfp_type_p (cif->rtype)) != 0)
+	{
+	  /* A Composite Type passed in VFP registers, either
+	     FFI_TYPE_STRUCT_VFP_FLOAT or FFI_TYPE_STRUCT_VFP_DOUBLE. */
+	  cif->flags = (unsigned) type_code;
+	}
+      else if (cif->rtype->size <= 4)
+	/* A Composite Type not larger than 4 bytes is returned in r0.  */
+	cif->flags = (unsigned)FFI_TYPE_INT;
+      else
+	/* A Composite Type larger than 4 bytes, or whose size cannot
+	   be determined statically ... is stored in memory at an
+	   address passed [in r0].  */
+	cif->flags = (unsigned)FFI_TYPE_STRUCT;
+      break;
+
+    default:
+      cif->flags = FFI_TYPE_INT;
+      break;
+    }
+
+  /* Map out the register placements of VFP register args.
+     The VFP hard-float calling conventions are slightly more sophisticated than
+     the base calling conventions, so we do it here instead of in ffi_prep_args(). */
+  if (cif->abi == FFI_VFP)
+    layout_vfp_args (cif);
+
+  return FFI_OK;
+}
+
+/* Perform machine dependent cif processing for variadic calls */
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+				    unsigned int nfixedargs,
+				    unsigned int ntotalargs)
+{
+  /* VFP variadic calls actually use the SYSV ABI */
+  if (cif->abi == FFI_VFP)
+	cif->abi = FFI_SYSV;
+
+  return ffi_prep_cif_machdep(cif);
+}
+
+/* Prototypes for assembly functions, in sysv.S */
+extern void ffi_call_SYSV (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *);
+extern void ffi_call_VFP (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *);
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  int small_struct = (cif->flags == FFI_TYPE_INT 
+		      && cif->rtype->type == FFI_TYPE_STRUCT);
+  int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT
+		    || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE);
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+
+  unsigned int temp;
+  
+  /* If the return value is a struct and we don't have a return	*/
+  /* value address then we need to make one		        */
+
+  if ((rvalue == NULL) && 
+      (cif->flags == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else if (small_struct)
+    ecif.rvalue = &temp;
+  else if (vfp_struct)
+    {
+      /* Largest case is double x 4. */
+      ecif.rvalue = alloca(32);
+    }
+  else
+    ecif.rvalue = rvalue;
+
+  switch (cif->abi) 
+    {
+    case FFI_SYSV:
+      ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
+      break;
+
+    case FFI_VFP:
+#ifdef __ARM_EABI__
+      ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
+      break;
+#endif
+
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+  if (small_struct)
+    memcpy (rvalue, &temp, cif->rtype->size);
+  else if (vfp_struct)
+    memcpy (rvalue, ecif.rvalue, cif->rtype->size);
+}
+
+/** private members **/
+
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+					 void** args, ffi_cif* cif, float *vfp_stack);
+
+void ffi_closure_SYSV (ffi_closure *);
+
+void ffi_closure_VFP (ffi_closure *);
+
+/* This function is jumped to by the trampoline */
+
+unsigned int
+ffi_closure_SYSV_inner (closure, respp, args, vfp_args)
+     ffi_closure *closure;
+     void **respp;
+     void *args;
+     void *vfp_args;
+{
+  // our various things...
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will re-set RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+
+/*@-exportheader@*/
+static void 
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
+			    void **avalue, ffi_cif *cif,
+			    /* Used only under VFP hard-float ABI. */
+			    float *vfp_stack)
+/*@=exportheader@*/
+{
+  register unsigned int i, vi = 0;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if ( cif->flags == FFI_TYPE_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += 4;
+  }
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+      size_t alignment;
+  
+      if (cif->abi == FFI_VFP
+	  && vi < cif->vfp_nargs && vfp_type_p (*p_arg))
+	{
+	  *p_argv++ = (void*)(vfp_stack + cif->vfp_args[vi++]);
+	  continue;
+	}
+
+      alignment = (*p_arg)->alignment;
+      if (alignment < 4)
+	alignment = 4;
+#ifdef _WIN32_WCE
+      else
+	if (alignment > 4)
+	  alignment = 4;
+#endif
+      /* Align if necessary */
+      if ((alignment - 1) & (unsigned) argp) {
+	argp = (char *) ALIGN(argp, alignment);
+      }
+
+      z = (*p_arg)->size;
+
+      /* because we're little endian, this is what it turns into.   */
+
+      *p_argv = (void*) argp;
+
+      p_argv++;
+      argp += z;
+    }
+  
+  return;
+}
+
+/* How to make a trampoline.  */
+
+extern unsigned int ffi_arm_trampoline[3];
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#include <mach/mach.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void *ffi_closure_trampoline_table_page;
+
+typedef struct ffi_trampoline_table ffi_trampoline_table;
+typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
+
+struct ffi_trampoline_table {
+  /* contigious writable and executable pages */
+  vm_address_t config_page;
+  vm_address_t trampoline_page;
+
+  /* free list tracking */
+  uint16_t free_count;
+  ffi_trampoline_table_entry *free_list;
+  ffi_trampoline_table_entry *free_list_pool;
+
+  ffi_trampoline_table *prev;
+  ffi_trampoline_table *next;
+};
+
+struct ffi_trampoline_table_entry {
+  void *(*trampoline)();
+  ffi_trampoline_table_entry *next;
+};
+
+/* Override the standard architecture trampoline size */
+// XXX TODO - Fix
+#undef FFI_TRAMPOLINE_SIZE
+#define FFI_TRAMPOLINE_SIZE 12
+
+/* The trampoline configuration is placed at 4080 bytes prior to the trampoline's entry point */
+#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - 4080));
+
+/* The first 16 bytes of the config page are unused, as they are unaddressable from the trampoline page. */
+#define FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET 16
+
+/* Total number of trampolines that fit in one trampoline table */
+#define FFI_TRAMPOLINE_COUNT ((PAGE_SIZE - FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET) / FFI_TRAMPOLINE_SIZE)
+
+static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
+static ffi_trampoline_table *ffi_trampoline_tables = NULL;
+
+static ffi_trampoline_table *
+ffi_trampoline_table_alloc ()
+{
+  ffi_trampoline_table *table = NULL;
+
+  /* Loop until we can allocate two contigious pages */
+  while (table == NULL) {
+    vm_address_t config_page = 0x0;
+    kern_return_t kt;
+
+    /* Try to allocate two pages */
+    kt = vm_allocate (mach_task_self (), &config_page, PAGE_SIZE*2, VM_FLAGS_ANYWHERE);
+    if (kt != KERN_SUCCESS) {
+      fprintf(stderr, "vm_allocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      break;
+    }
+
+    /* Now drop the second half of the allocation to make room for the trampoline table */
+    vm_address_t trampoline_page = config_page+PAGE_SIZE;
+    kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS) {
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      break;
+    }
+
+    /* Remap the trampoline table to directly follow the config page */
+    vm_prot_t cur_prot;
+    vm_prot_t max_prot;
+
+    kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE, mach_task_self (), (vm_address_t) &ffi_closure_trampoline_table_page, FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
+
+    /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
+    if (kt != KERN_SUCCESS) {
+      /* Log unexpected failures */
+      if (kt != KERN_NO_SPACE) {
+        fprintf(stderr, "vm_remap() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+      }
+
+      vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
+      continue;
+    }
+
+    /* We have valid trampoline and config pages */
+    table = calloc (1, sizeof(ffi_trampoline_table));
+    table->free_count = FFI_TRAMPOLINE_COUNT;
+    table->config_page = config_page;
+    table->trampoline_page = trampoline_page;
+
+    /* Create and initialize the free list */
+    table->free_list_pool = calloc(FFI_TRAMPOLINE_COUNT, sizeof(ffi_trampoline_table_entry));
+
+    uint16_t i;
+    for (i = 0; i < table->free_count; i++) {
+      ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
+      entry->trampoline = (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+
+      if (i < table->free_count - 1)
+        entry->next = &table->free_list_pool[i+1];
+    }
+
+    table->free_list = table->free_list_pool;
+  }
+
+  return table;
+}
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  /* Create the closure */
+  ffi_closure *closure = malloc(size);
+  if (closure == NULL)
+    return NULL;
+
+  pthread_mutex_lock(&ffi_trampoline_lock);
+
+  /* Check for an active trampoline table with available entries. */
+  ffi_trampoline_table *table = ffi_trampoline_tables;
+  if (table == NULL || table->free_list == NULL) {
+    table = ffi_trampoline_table_alloc ();
+    if (table == NULL) {
+      free(closure);
+      return NULL;
+    }
+
+    /* Insert the new table at the top of the list */
+    table->next = ffi_trampoline_tables;
+    if (table->next != NULL)
+        table->next->prev = table;
+
+    ffi_trampoline_tables = table;
+  }
+
+  /* Claim the free entry */
+  ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
+  ffi_trampoline_tables->free_list = entry->next;
+  ffi_trampoline_tables->free_count--;
+  entry->next = NULL;
+
+  pthread_mutex_unlock(&ffi_trampoline_lock);
+
+  /* Initialize the return values */
+  *code = entry->trampoline;
+  closure->trampoline_table = table;
+  closure->trampoline_table_entry = entry;
+
+  return closure;
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  ffi_closure *closure = ptr;
+
+  pthread_mutex_lock(&ffi_trampoline_lock);
+
+  /* Fetch the table and entry references */
+  ffi_trampoline_table *table = closure->trampoline_table;
+  ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
+
+  /* Return the entry to the free list */
+  entry->next = table->free_list;
+  table->free_list = entry;
+  table->free_count++;
+
+  /* If all trampolines within this table are free, and at least one other table exists, deallocate
+   * the table */
+  if (table->free_count == FFI_TRAMPOLINE_COUNT && ffi_trampoline_tables != table) {
+    /* Remove from the list */
+    if (table->prev != NULL)
+      table->prev->next = table->next;
+
+    if (table->next != NULL)
+      table->next->prev = table->prev;
+
+    /* Deallocate pages */
+    kern_return_t kt;
+    kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS)
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+
+    kt = vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
+    if (kt != KERN_SUCCESS)
+      fprintf(stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, __FILE__, __LINE__);
+
+    /* Deallocate free list */
+    free (table->free_list_pool);
+    free (table);
+  } else if (ffi_trampoline_tables != table) {
+    /* Otherwise, bump this table to the top of the list */
+    table->prev = NULL;
+    table->next = ffi_trampoline_tables;
+    if (ffi_trampoline_tables != NULL)
+      ffi_trampoline_tables->prev = table;
+
+    ffi_trampoline_tables = table;
+  }
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Free the closure */
+  free (closure);
+}
+
+#else
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX)				\
+({ unsigned char *__tramp = (unsigned char*)(TRAMP);			\
+   unsigned int  __fun = (unsigned int)(FUN);				\
+   unsigned int  __ctx = (unsigned int)(CTX);				\
+   unsigned char *insns = (unsigned char *)(CTX);                       \
+   memcpy (__tramp, ffi_arm_trampoline, sizeof ffi_arm_trampoline);     \
+   *(unsigned int*) &__tramp[12] = __ctx;				\
+   *(unsigned int*) &__tramp[16] = __fun;				\
+   __clear_cache((&__tramp[0]), (&__tramp[19])); /* Clear data mapping.  */ \
+   __clear_cache(insns, insns + 3 * sizeof (unsigned int));             \
+                                                 /* Clear instruction   \
+                                                    mapping.  */        \
+ })
+
+#endif
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*,void*,void**,void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  void (*closure_func)(ffi_closure*) = NULL;
+
+  if (cif->abi == FFI_SYSV)
+    closure_func = &ffi_closure_SYSV;
+#ifdef __ARM_EABI__
+  else if (cif->abi == FFI_VFP)
+    closure_func = &ffi_closure_VFP;
+#endif
+  else
+    return FFI_BAD_ABI;
+    
+#if FFI_EXEC_TRAMPOLINE_TABLE
+  void **config = FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc);
+  config[0] = closure;
+  config[1] = closure_func;
+#else
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], \
+		       closure_func,  \
+		       codeloc);
+#endif
+
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* Below are routines for VFP hard-float support. */
+
+static int rec_vfp_type_p (ffi_type *t, int *elt, int *elnum)
+{
+  switch (t->type)
+    {
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+      *elt = (int) t->type;
+      *elnum = 1;
+      return 1;
+
+    case FFI_TYPE_STRUCT_VFP_FLOAT:
+      *elt = FFI_TYPE_FLOAT;
+      *elnum = t->size / sizeof (float);
+      return 1;
+
+    case FFI_TYPE_STRUCT_VFP_DOUBLE:
+      *elt = FFI_TYPE_DOUBLE;
+      *elnum = t->size / sizeof (double);
+      return 1;
+
+    case FFI_TYPE_STRUCT:;
+      {
+	int base_elt = 0, total_elnum = 0;
+	ffi_type **el = t->elements;
+	while (*el)
+	  {
+	    int el_elt = 0, el_elnum = 0;
+	    if (! rec_vfp_type_p (*el, &el_elt, &el_elnum)
+		|| (base_elt && base_elt != el_elt)
+		|| total_elnum + el_elnum > 4)
+	      return 0;
+	    base_elt = el_elt;
+	    total_elnum += el_elnum;
+	    el++;
+	  }
+	*elnum = total_elnum;
+	*elt = base_elt;
+	return 1;
+      }
+    default: ;
+    }
+  return 0;
+}
+
+static int vfp_type_p (ffi_type *t)
+{
+  int elt, elnum;
+  if (rec_vfp_type_p (t, &elt, &elnum))
+    {
+      if (t->type == FFI_TYPE_STRUCT)
+	{
+	  if (elnum == 1)
+	    t->type = elt;
+	  else
+	    t->type = (elt == FFI_TYPE_FLOAT
+		       ? FFI_TYPE_STRUCT_VFP_FLOAT
+		       : FFI_TYPE_STRUCT_VFP_DOUBLE);
+	}
+      return (int) t->type;
+    }
+  return 0;
+}
+
+static void place_vfp_arg (ffi_cif *cif, ffi_type *t)
+{
+  int reg = cif->vfp_reg_free;
+  int nregs = t->size / sizeof (float);
+  int align = ((t->type == FFI_TYPE_STRUCT_VFP_FLOAT
+		|| t->type == FFI_TYPE_FLOAT) ? 1 : 2);
+  /* Align register number. */
+  if ((reg & 1) && align == 2)
+    reg++;
+  while (reg + nregs <= 16)
+    {
+      int s, new_used = 0;
+      for (s = reg; s < reg + nregs; s++)
+	{
+	  new_used |= (1 << s);
+	  if (cif->vfp_used & (1 << s))
+	    {
+	      reg += align;
+	      goto next_reg;
+	    }
+	}
+      /* Found regs to allocate. */
+      cif->vfp_used |= new_used;
+      cif->vfp_args[cif->vfp_nargs++] = reg;
+
+      /* Update vfp_reg_free. */
+      if (cif->vfp_used & (1 << cif->vfp_reg_free))
+	{
+	  reg += nregs;
+	  while (cif->vfp_used & (1 << reg))
+	    reg += 1;
+	  cif->vfp_reg_free = reg;
+	}
+      return;
+    next_reg: ;
+    }
+}
+
+static void layout_vfp_args (ffi_cif *cif)
+{
+  int i;
+  /* Init VFP fields */
+  cif->vfp_used = 0;
+  cif->vfp_nargs = 0;
+  cif->vfp_reg_free = 0;
+  memset (cif->vfp_args, -1, 16); /* Init to -1. */
+
+  for (i = 0; i < cif->nargs; i++)
+    {
+      ffi_type *t = cif->arg_types[i];
+      if (vfp_type_p (t))
+	place_vfp_arg (cif, t);
+    }
+}
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/arm/gentramp.sh
+++ b/BeefySysLib/third_party/libffi/ios/src/arm/gentramp.sh
@ -0,0 +1,123 @@
+#ifdef __arm__
+
+#!/bin/sh
+
+# -----------------------------------------------------------------------
+#  gentramp.sh - Copyright (c) 2010, Plausible Labs Cooperative, Inc.
+#  
+#  ARM Trampoline Page Generator
+#
+#  Permission is hereby granted, free of charge, to any person obtaining
+#  a copy of this software and associated documentation files (the
+#  ``Software''), to deal in the Software without restriction, including
+#  without limitation the rights to use, copy, modify, merge, publish,
+#  distribute, sublicense, and/or sell copies of the Software, and to
+#  permit persons to whom the Software is furnished to do so, subject to
+#  the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+#  NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#  DEALINGS IN THE SOFTWARE.
+#  -----------------------------------------------------------------------
+
+PROGNAME=$0
+
+# Each trampoline is exactly 3 instructions, or 12 bytes. If any of these values change,
+# the entire arm trampoline implementation must be updated to match, too.
+
+# Size of an individual trampoline, in bytes
+TRAMPOLINE_SIZE=12
+
+# Page size, in bytes
+PAGE_SIZE=4096
+
+# Compute the size of the reachable config page; The first 16 bytes of the config page
+# are unreachable due to our maximum pc-relative ldr offset.
+PAGE_AVAIL=`expr $PAGE_SIZE - 16`
+
+# Compute the number of of available trampolines. 
+TRAMPOLINE_COUNT=`expr $PAGE_AVAIL / $TRAMPOLINE_SIZE`
+
+header () {
+    echo "# GENERATED CODE - DO NOT EDIT"
+    echo "# This file was generated by $PROGNAME"
+    echo ""
+
+    # Write out the license header
+cat << EOF
+#  Copyright (c) 2010, Plausible Labs Cooperative, Inc.
+#  
+#  Permission is hereby granted, free of charge, to any person obtaining
+#  a copy of this software and associated documentation files (the
+#  ``Software''), to deal in the Software without restriction, including
+#  without limitation the rights to use, copy, modify, merge, publish,
+#  distribute, sublicense, and/or sell copies of the Software, and to
+#  permit persons to whom the Software is furnished to do so, subject to
+#  the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+#  NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#  DEALINGS IN THE SOFTWARE.
+#  -----------------------------------------------------------------------
+
+EOF
+
+    # Write out the trampoline table, aligned to the page boundary
+    echo ".text"
+    echo ".align 12"
+    echo ".globl _ffi_closure_trampoline_table_page"
+    echo "_ffi_closure_trampoline_table_page:"
+}
+
+
+# WARNING - Don't modify the trampoline code size without also updating the relevent libffi code
+trampoline () {
+    cat << END
+
+    // trampoline
+    // Save to stack
+    stmfd sp!, {r0-r3}
+
+    // Load the context argument from the config page.
+    // This places the first usable config value at _ffi_closure_trampoline_table-4080
+    // This accounts for the above 4-byte stmfd instruction, plus 8 bytes constant when loading from pc.
+    ldr r0, [pc, #-4092]
+
+    // Load the jump address from the config page.
+    ldr pc, [pc, #-4092]
+
+END
+}
+
+main () {
+    # Write out the header
+    header
+
+    # Write out the trampolines
+    local i=0
+    while [ $i -lt ${TRAMPOLINE_COUNT} ]; do
+        trampoline
+        local i=`expr $i + 1`
+    done
+}
+
+main
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/arm/sysv.S
+++ b/BeefySysLib/third_party/libffi/ios/src/arm/sysv.S
@ -0,0 +1,510 @@
+#ifdef __arm__
+
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
+	    Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+   
+   ARM Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+#ifdef HAVE_MACHINE_ASM_H
+#include <machine/asm.h>
+#else
+#ifdef __USER_LABEL_PREFIX__
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+#define CNAME(x) x
+#endif
+#ifdef __APPLE__
+#define ENTRY(x) .globl _##x; _##x:
+#else
+#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x):
+#endif /* __APPLE__ */
+#endif
+
+#ifdef __ELF__
+#define LSYM(x) .x
+#else
+#define LSYM(x) x
+#endif
+
+/* Use the SOFTFP return value ABI on Mac OS X, as per the iOS ABI
+  Function Call Guide */
+#ifdef __APPLE__
+#define __SOFTFP__
+#endif
+
+/* We need a better way of testing for this, but for now, this is all 
+   we can do.  */
+@ This selects the minimum architecture level required.
+#define __ARM_ARCH__ 3
+
+#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 4
+#endif
+        
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+	|| defined(__ARM_ARCH_5TEJ__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+        || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+	|| defined(__ARM_ARCH_6M__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+        || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+	|| defined(__ARM_ARCH_7EM__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 7
+#endif
+
+#if __ARM_ARCH__ >= 5
+# define call_reg(x)	blx	x
+#elif defined (__ARM_ARCH_4T__)
+# define call_reg(x)	mov	lr, pc ; bx	x
+# if defined(__thumb__) || defined(__THUMB_INTERWORK__)
+#  define __INTERWORKING__
+# endif
+#else
+# define call_reg(x)	mov	lr, pc ; mov	pc, x
+#endif
+
+/* Conditionally compile unwinder directives.  */
+#ifdef __ARM_EABI__
+#define UNWIND
+#else
+#define UNWIND @
+#endif	
+
+	
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+.macro	ARM_FUNC_START name
+	.text
+	.align 0
+	.thumb
+	.thumb_func
+#ifdef __APPLE__
+	ENTRY($0)
+#else
+	ENTRY(\name)
+#endif
+	bx	pc
+	nop
+	.arm
+	UNWIND .fnstart
+/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
+   directly from other local arm routines.  */
+#ifdef __APPLE__
+_L__$0:
+#else
+_L__\name:
+#endif
+.endm
+#else
+.macro	ARM_FUNC_START name
+	.text
+	.align 0
+	.arm
+#ifdef __APPLE__
+	ENTRY($0)
+#else
+	ENTRY(\name)
+#endif
+	UNWIND .fnstart
+.endm
+#endif
+
+.macro	RETLDM	regs=, cond=, dirn=ia
+#if defined (__INTERWORKING__)
+	.ifc "\regs",""
+	ldr\cond	lr, [sp], #4
+	.else
+	ldm\cond\dirn	sp!, {\regs, lr}
+	.endif
+	bx\cond	lr
+#else
+	.ifc "\regs",""
+	ldr\cond	pc, [sp], #4
+	.else
+	ldm\cond\dirn	sp!, {\regs, pc}
+	.endif
+#endif
+.endm
+
+	@ r0:   ffi_prep_args
+	@ r1:   &ecif
+	@ r2:   cif->bytes
+	@ r3:   fig->flags
+	@ sp+0: ecif.rvalue
+
+	@ This assumes we are using gas.
+ARM_FUNC_START ffi_call_SYSV
+	@ Save registers
+        stmfd	sp!, {r0-r3, fp, lr}
+	UNWIND .save	{r0-r3, fp, lr}
+	mov	fp, sp
+
+	UNWIND .setfp	fp, sp
+
+	@ Make room for all of the new args.
+	sub	sp, fp, r2
+
+	@ Place all of the ffi_prep_args in position
+	mov	r0, sp
+	@     r1 already set
+
+	@ Call ffi_prep_args(stack, &ecif)
+	bl	CNAME(ffi_prep_args)
+
+	@ move first 4 parameters in registers
+	ldmia	sp, {r0-r3}
+
+	@ and adjust stack
+	sub	lr, fp, sp	@ cif->bytes == fp - sp
+	ldr	ip, [fp]	@ load fn() in advance
+	cmp	lr, #16
+	movhs	lr, #16
+	add	sp, sp, lr
+
+	@ call (fn) (...)
+	call_reg(ip)
+	
+	@ Remove the space we pushed for the args
+	mov	sp, fp
+
+	@ Load r2 with the pointer to storage for the return value
+	ldr	r2, [sp, #24]
+
+	@ Load r3 with the return type code 
+	ldr	r3, [sp, #12]
+
+	@ If the return value pointer is NULL, assume no return value.
+	cmp	r2, #0
+	beq	LSYM(Lepilogue)
+
+@ return INT
+	cmp	r3, #FFI_TYPE_INT
+#if defined(__SOFTFP__) || defined(__ARM_EABI__)
+	cmpne	r3, #FFI_TYPE_FLOAT
+#endif
+	streq	r0, [r2]
+	beq	LSYM(Lepilogue)
+
+	@ return INT64
+	cmp	r3, #FFI_TYPE_SINT64
+#if defined(__SOFTFP__) || defined(__ARM_EABI__)
+	cmpne	r3, #FFI_TYPE_DOUBLE
+#endif
+	stmeqia	r2, {r0, r1}
+
+#if !defined(__SOFTFP__) && !defined(__ARM_EABI__)
+	beq	LSYM(Lepilogue)
+
+@ return FLOAT
+	cmp	r3, #FFI_TYPE_FLOAT
+	stfeqs	f0, [r2]
+	beq	LSYM(Lepilogue)
+
+@ return DOUBLE or LONGDOUBLE
+	cmp	r3, #FFI_TYPE_DOUBLE
+	stfeqd	f0, [r2]
+#endif
+
+LSYM(Lepilogue):
+#if defined (__INTERWORKING__)
+	ldmia   sp!, {r0-r3,fp, lr}
+	bx	lr
+#else
+	ldmia   sp!, {r0-r3,fp, pc}
+#endif
+
+.ffi_call_SYSV_end:
+	UNWIND .fnend
+#ifdef __ELF__
+        .size    CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV)
+#endif
+
+
+/*
+	unsigned int FFI_HIDDEN
+	ffi_closure_SYSV_inner (closure, respp, args)
+	     ffi_closure *closure;
+	     void **respp;
+  	     void *args;
+*/
+
+ARM_FUNC_START ffi_closure_SYSV
+	UNWIND .pad #16
+	add	ip, sp, #16
+	stmfd	sp!, {ip, lr}
+	UNWIND .save	{r0, lr}
+	add	r2, sp, #8
+	UNWIND .pad #16
+	sub	sp, sp, #16
+	str	sp, [sp, #8]
+	add	r1, sp, #8
+	bl	CNAME(ffi_closure_SYSV_inner)
+	cmp	r0, #FFI_TYPE_INT
+	beq	.Lretint
+
+	cmp	r0, #FFI_TYPE_FLOAT
+#if defined(__SOFTFP__) || defined(__ARM_EABI__)
+	beq	.Lretint
+#else
+	beq	.Lretfloat
+#endif
+
+	cmp	r0, #FFI_TYPE_DOUBLE
+#if defined(__SOFTFP__) || defined(__ARM_EABI__)
+	beq	.Lretlonglong
+#else
+	beq	.Lretdouble
+#endif
+
+	cmp	r0, #FFI_TYPE_LONGDOUBLE
+#if defined(__SOFTFP__) || defined(__ARM_EABI__)
+	beq	.Lretlonglong
+#else
+	beq	.Lretlongdouble
+#endif
+
+	cmp	r0, #FFI_TYPE_SINT64
+	beq	.Lretlonglong
+.Lclosure_epilogue:
+	add	sp, sp, #16
+	ldmfd	sp, {sp, pc}
+.Lretint:
+	ldr	r0, [sp]
+	b	.Lclosure_epilogue
+.Lretlonglong:
+	ldr	r0, [sp]
+	ldr	r1, [sp, #4]
+	b	.Lclosure_epilogue
+
+#if !defined(__SOFTFP__) && !defined(__ARM_EABI__)
+.Lretfloat:
+	ldfs	f0, [sp]
+	b	.Lclosure_epilogue
+.Lretdouble:
+	ldfd	f0, [sp]
+	b	.Lclosure_epilogue
+.Lretlongdouble:
+	ldfd	f0, [sp]
+	b	.Lclosure_epilogue
+#endif
+
+.ffi_closure_SYSV_end:
+	UNWIND .fnend
+#ifdef __ELF__
+        .size    CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV)
+#endif
+
+
+/* Below are VFP hard-float ABI call and closure implementations.
+   Add VFP FPU directive here. This is only compiled into the library
+   under EABI.  */
+#ifdef __ARM_EABI__
+	.fpu	vfp
+
+	@ r0:   fn
+	@ r1:   &ecif
+	@ r2:   cif->bytes
+	@ r3:   fig->flags
+	@ sp+0: ecif.rvalue
+
+ARM_FUNC_START ffi_call_VFP
+	@ Save registers
+        stmfd	sp!, {r0-r3, fp, lr}
+	UNWIND .save	{r0-r3, fp, lr}
+	mov	fp, sp
+	UNWIND .setfp	fp, sp
+
+	@ Make room for all of the new args.
+	sub	sp, sp, r2
+
+	@ Make room for loading VFP args
+	sub	sp, sp, #64
+
+	@ Place all of the ffi_prep_args in position
+	mov	r0, sp
+	@     r1 already set
+	sub	r2, fp, #64   @ VFP scratch space
+
+	@ Call ffi_prep_args(stack, &ecif, vfp_space)
+	bl	CNAME(ffi_prep_args)
+
+	@ Load VFP register args if needed
+	cmp	r0, #0
+	beq	LSYM(Lbase_args)
+
+	@ Load only d0 if possible
+	cmp	r0, #3
+	sub	ip, fp, #64
+	flddle	d0, [ip]
+	fldmiadgt	ip, {d0-d7}
+
+LSYM(Lbase_args):
+	@ move first 4 parameters in registers
+	ldmia	sp, {r0-r3}
+
+	@ and adjust stack
+	sub	lr, ip, sp	@ cif->bytes == (fp - 64) - sp
+	ldr	ip, [fp]	@ load fn() in advance
+        cmp	lr, #16
+	movhs	lr, #16
+        add	sp, sp, lr
+
+	@ call (fn) (...)
+	call_reg(ip)
+
+	@ Remove the space we pushed for the args
+	mov	sp, fp
+
+	@ Load r2 with the pointer to storage for
+	@ the return value
+	ldr	r2, [sp, #24]
+
+	@ Load r3 with the return type code 
+	ldr	r3, [sp, #12]
+
+	@ If the return value pointer is NULL,
+	@ assume no return value.
+	cmp	r2, #0
+	beq	LSYM(Lepilogue_vfp)
+
+	cmp	r3, #FFI_TYPE_INT
+	streq	r0, [r2]
+	beq	LSYM(Lepilogue_vfp)
+
+	cmp	r3, #FFI_TYPE_SINT64
+	stmeqia	r2, {r0, r1}
+	beq	LSYM(Lepilogue_vfp)
+
+	cmp	r3, #FFI_TYPE_FLOAT
+	fstseq	s0, [r2]
+	beq	LSYM(Lepilogue_vfp)
+	
+	cmp	r3, #FFI_TYPE_DOUBLE
+	fstdeq	d0, [r2]
+	beq	LSYM(Lepilogue_vfp)
+
+	cmp	r3, #FFI_TYPE_STRUCT_VFP_FLOAT
+	cmpne	r3, #FFI_TYPE_STRUCT_VFP_DOUBLE
+	fstmiadeq	r2, {d0-d3}
+
+LSYM(Lepilogue_vfp):
+	RETLDM	"r0-r3,fp"
+
+.ffi_call_VFP_end:
+	UNWIND .fnend
+        .size    CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP)
+
+
+ARM_FUNC_START ffi_closure_VFP
+	fstmfdd	sp!, {d0-d7}
+	@ r0-r3, then d0-d7
+	UNWIND .pad #80
+	add	ip, sp, #80
+	stmfd	sp!, {ip, lr}
+	UNWIND .save	{r0, lr}
+	add	r2, sp, #72
+	add	r3, sp, #8
+	UNWIND .pad #72
+	sub	sp, sp, #72
+	str	sp, [sp, #64]
+	add	r1, sp, #64
+	bl	CNAME(ffi_closure_SYSV_inner)
+
+	cmp	r0, #FFI_TYPE_INT
+	beq	.Lretint_vfp
+
+	cmp	r0, #FFI_TYPE_FLOAT
+	beq	.Lretfloat_vfp
+
+	cmp	r0, #FFI_TYPE_DOUBLE
+	cmpne	r0, #FFI_TYPE_LONGDOUBLE
+	beq	.Lretdouble_vfp
+
+	cmp	r0, #FFI_TYPE_SINT64
+	beq	.Lretlonglong_vfp
+
+	cmp	r0, #FFI_TYPE_STRUCT_VFP_FLOAT
+	beq	.Lretfloat_struct_vfp
+
+	cmp	r0, #FFI_TYPE_STRUCT_VFP_DOUBLE
+	beq	.Lretdouble_struct_vfp
+	
+.Lclosure_epilogue_vfp:
+	add	sp, sp, #72
+	ldmfd	sp, {sp, pc}
+
+.Lretfloat_vfp:
+	flds	s0, [sp]
+	b	.Lclosure_epilogue_vfp
+.Lretdouble_vfp:
+	fldd	d0, [sp]
+	b	.Lclosure_epilogue_vfp
+.Lretint_vfp:
+	ldr	r0, [sp]
+	b	.Lclosure_epilogue_vfp
+.Lretlonglong_vfp:
+	ldmia	sp, {r0, r1}
+	b	.Lclosure_epilogue_vfp
+.Lretfloat_struct_vfp:
+	fldmiad	sp, {d0-d1}
+	b	.Lclosure_epilogue_vfp
+.Lretdouble_struct_vfp:
+	fldmiad	sp, {d0-d3}
+	b	.Lclosure_epilogue_vfp
+
+.ffi_closure_VFP_end:
+	UNWIND .fnend
+        .size    CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP)
+#endif
+
+ENTRY(ffi_arm_trampoline)
+	stmfd sp!, {r0-r3}
+	ldr r0, [pc]
+	ldr pc, [pc]
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",%progbits
+#endif
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/arm/trampoline.S
+++ b/BeefySysLib/third_party/libffi/ios/src/arm/trampoline.S
--- a/BeefySysLib/third_party/libffi/ios/src/closures.c
+++ b/BeefySysLib/third_party/libffi/ios/src/closures.c
@ -0,0 +1,644 @@
+/* -----------------------------------------------------------------------
+   closures.c - Copyright (c) 2007, 2009, 2010  Red Hat, Inc.
+                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
+                Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   Code to allocate and deallocate memory for closures.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined __linux__ && !defined _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
+# if __gnu_linux__
+/* This macro indicates it may be forbidden to map anonymous memory
+   with both write and execute permission.  Code compiled when this
+   option is defined will attempt to map such pages once, but if it
+   fails, it falls back to creating a temporary file in a writable and
+   executable filesystem and mapping pages from it into separate
+   locations in the virtual memory space, one location writable and
+   another executable.  */
+#  define FFI_MMAP_EXEC_WRIT 1
+#  define HAVE_MNTENT 1
+# endif
+# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)
+/* Windows systems may have Data Execution Protection (DEP) enabled, 
+   which requires the use of VirtualMalloc/VirtualFree to alloc/free
+   executable memory. */
+#  define FFI_MMAP_EXEC_WRIT 1
+# endif
+#endif
+
+#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
+# ifdef __linux__
+/* When defined to 1 check for SELinux and if SELinux is active,
+   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
+   might cause audit messages.  */
+#  define FFI_MMAP_EXEC_SELINUX 1
+# endif
+#endif
+
+#if FFI_CLOSURES
+
+# if FFI_EXEC_TRAMPOLINE_TABLE
+
+// Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations.
+
+# elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
+
+#define USE_LOCKS 1
+#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 1
+#endif
+#endif
+
+/* We need to use mmap, not sbrk.  */
+#define HAVE_MORECORE 0
+
+/* We could, in theory, support mremap, but it wouldn't buy us anything.  */
+#define HAVE_MREMAP 0
+
+/* We have no use for this, so save some code and data.  */
+#define NO_MALLINFO 1
+
+/* We need all allocations to be in regular segments, otherwise we
+   lose track of the corresponding code address.  */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+
+/* Don't allocate more than a page unless needed.  */
+#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
+
+#if FFI_CLOSURE_TEST
+/* Don't release single pages, to avoid a worst-case scenario of
+   continuously allocating and releasing single pages, but release
+   pairs of pages, which should do just as well given that allocations
+   are likely to be small.  */
+#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize)
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <stdio.h>
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#ifdef HAVE_MNTENT
+#include <mntent.h>
+#endif /* HAVE_MNTENT */
+#include <sys/param.h>
+#include <pthread.h>
+
+/* We don't want sys/mman.h to be included after we redefine mmap and
+   dlmunmap.  */
+#include <sys/mman.h>
+#define LACKS_SYS_MMAN_H 1
+
+#if FFI_MMAP_EXEC_SELINUX
+#include <sys/statfs.h>
+#include <stdlib.h>
+
+static int selinux_enabled = -1;
+
+static int
+selinux_enabled_check (void)
+{
+  struct statfs sfs;
+  FILE *f;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (statfs ("/selinux", &sfs) >= 0
+      && (unsigned int) sfs.f_type == 0xf97cff8cU)
+    return 1;
+  f = fopen ("/proc/mounts", "r");
+  if (f == NULL)
+    return 0;
+  while (getline (&buf, &len, f) >= 0)
+    {
+      char *p = strchr (buf, ' ');
+      if (p == NULL)
+        break;
+      p = strchr (p + 1, ' ');
+      if (p == NULL)
+        break;
+      if (strncmp (p + 1, "selinuxfs ", 10) == 0)
+        {
+          free (buf);
+          fclose (f);
+          return 1;
+        }
+    }
+  free (buf);
+  fclose (f);
+  return 0;
+}
+
+#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \
+			      : (selinux_enabled = selinux_enabled_check ()))
+
+#else
+
+#define is_selinux_enabled() 0
+
+#endif /* !FFI_MMAP_EXEC_SELINUX */
+
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
+  if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL)
+    return 1;
+  else
+    return 0;
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+                               : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#elif defined (__CYGWIN__) || defined(__INTERIX)
+
+#include <sys/mman.h>
+
+/* Cygwin is Linux-like, but not quite that Linux-like.  */
+#define is_selinux_enabled() 0
+
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+/* Declare all functions defined in dlmalloc.c as static.  */
+static void *dlmalloc(size_t);
+static void dlfree(void*);
+static void *dlcalloc(size_t, size_t) MAYBE_UNUSED;
+static void *dlrealloc(void *, size_t) MAYBE_UNUSED;
+static void *dlmemalign(size_t, size_t) MAYBE_UNUSED;
+static void *dlvalloc(size_t) MAYBE_UNUSED;
+static int dlmallopt(int, int) MAYBE_UNUSED;
+static size_t dlmalloc_footprint(void) MAYBE_UNUSED;
+static size_t dlmalloc_max_footprint(void) MAYBE_UNUSED;
+static void** dlindependent_calloc(size_t, size_t, void**) MAYBE_UNUSED;
+static void** dlindependent_comalloc(size_t, size_t*, void**) MAYBE_UNUSED;
+static void *dlpvalloc(size_t) MAYBE_UNUSED;
+static int dlmalloc_trim(size_t) MAYBE_UNUSED;
+static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED;
+static void dlmalloc_stats(void) MAYBE_UNUSED;
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+/* Use these for mmap and munmap within dlmalloc.c.  */
+static void *dlmmap(void *, size_t, int, int, int, off_t);
+static int dlmunmap(void *, size_t);
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+#define mmap dlmmap
+#define munmap dlmunmap
+
+#include "dlmalloc.c"
+
+#undef mmap
+#undef munmap
+
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+
+/* A mutex used to synchronize access to *exec* variables in this file.  */
+static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* A file descriptor of a temporary file from which we'll map
+   executable pages.  */
+static int execfd = -1;
+
+/* The amount of space already allocated from the temporary file.  */
+static size_t execsize = 0;
+
+/* Open a temporary file name, and immediately unlink it.  */
+static int
+open_temp_exec_file_name (char *name)
+{
+  int fd = mkstemp (name);
+
+  if (fd != -1)
+    unlink (name);
+
+  return fd;
+}
+
+/* Open a temporary file in the named directory.  */
+static int
+open_temp_exec_file_dir (const char *dir)
+{
+  static const char suffix[] = "/ffiXXXXXX";
+  int lendir = strlen (dir);
+  char *tempname = __builtin_alloca (lendir + sizeof (suffix));
+
+  if (!tempname)
+    return -1;
+
+  memcpy (tempname, dir, lendir);
+  memcpy (tempname + lendir, suffix, sizeof (suffix));
+
+  return open_temp_exec_file_name (tempname);
+}
+
+/* Open a temporary file in the directory in the named environment
+   variable.  */
+static int
+open_temp_exec_file_env (const char *envvar)
+{
+  const char *value = getenv (envvar);
+
+  if (!value)
+    return -1;
+
+  return open_temp_exec_file_dir (value);
+}
+
+#ifdef HAVE_MNTENT
+/* Open a temporary file in an executable and writable mount point
+   listed in the mounts file.  Subsequent calls with the same mounts
+   keep searching for mount points in the same file.  Providing NULL
+   as the mounts file closes the file.  */
+static int
+open_temp_exec_file_mnt (const char *mounts)
+{
+  static const char *last_mounts;
+  static FILE *last_mntent;
+
+  if (mounts != last_mounts)
+    {
+      if (last_mntent)
+	endmntent (last_mntent);
+
+      last_mounts = mounts;
+
+      if (mounts)
+	last_mntent = setmntent (mounts, "r");
+      else
+	last_mntent = NULL;
+    }
+
+  if (!last_mntent)
+    return -1;
+
+  for (;;)
+    {
+      int fd;
+      struct mntent mnt;
+      char buf[MAXPATHLEN * 3];
+
+      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
+	return -1;
+
+      if (hasmntopt (&mnt, "ro")
+	  || hasmntopt (&mnt, "noexec")
+	  || access (mnt.mnt_dir, W_OK))
+	continue;
+
+      fd = open_temp_exec_file_dir (mnt.mnt_dir);
+
+      if (fd != -1)
+	return fd;
+    }
+}
+#endif /* HAVE_MNTENT */
+
+/* Instructions to look for a location to hold a temporary file that
+   can be mapped in for execution.  */
+static struct
+{
+  int (*func)(const char *);
+  const char *arg;
+  int repeat;
+} open_temp_exec_file_opts[] = {
+  { open_temp_exec_file_env, "TMPDIR", 0 },
+  { open_temp_exec_file_dir, "/tmp", 0 },
+  { open_temp_exec_file_dir, "/var/tmp", 0 },
+  { open_temp_exec_file_dir, "/dev/shm", 0 },
+  { open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
+  { open_temp_exec_file_mnt, "/etc/mtab", 1 },
+  { open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
+};
+
+/* Current index into open_temp_exec_file_opts.  */
+static int open_temp_exec_file_opts_idx = 0;
+
+/* Reset a current multi-call func, then advances to the next entry.
+   If we're at the last, go back to the first and return nonzero,
+   otherwise return zero.  */
+static int
+open_temp_exec_file_opts_next (void)
+{
+  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL);
+
+  open_temp_exec_file_opts_idx++;
+  if (open_temp_exec_file_opts_idx
+      == (sizeof (open_temp_exec_file_opts)
+	  / sizeof (*open_temp_exec_file_opts)))
+    {
+      open_temp_exec_file_opts_idx = 0;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return a file descriptor of a temporary zero-sized file in a
+   writable and exexutable filesystem.  */
+static int
+open_temp_exec_file (void)
+{
+  int fd;
+
+  do
+    {
+      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func
+	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg);
+
+      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat
+	  || fd == -1)
+	{
+	  if (open_temp_exec_file_opts_next ())
+	    break;
+	}
+    }
+  while (fd == -1);
+
+  return fd;
+}
+
+/* Map in a chunk of memory from the temporary exec file into separate
+   locations in the virtual memory address space, one writable and one
+   executable.  Returns the address of the writable portion, after
+   storing an offset to the corresponding executable portion at the
+   last word of the requested chunk.  */
+static void *
+dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+{
+  void *ptr;
+
+  if (execfd == -1)
+    {
+      open_temp_exec_file_opts_idx = 0;
+    retry_open:
+      execfd = open_temp_exec_file ();
+      if (execfd == -1)
+	return MFAIL;
+    }
+
+  offset = execsize;
+
+  if (ftruncate (execfd, offset + length))
+    return MFAIL;
+
+  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+  flags |= MAP_SHARED;
+
+  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC,
+	      flags, execfd, offset);
+  if (ptr == MFAIL)
+    {
+      if (!offset)
+	{
+	  close (execfd);
+	  goto retry_open;
+	}
+      ftruncate (execfd, offset);
+      return MFAIL;
+    }
+  else if (!offset
+	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts_next ();
+
+  start = mmap (start, length, prot, flags, execfd, offset);
+
+  if (start == MFAIL)
+    {
+      munmap (ptr, length);
+      ftruncate (execfd, offset);
+      return start;
+    }
+
+  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start;
+
+  execsize += length;
+
+  return start;
+}
+
+/* Map in a writable and executable chunk of memory if possible.
+   Failing that, fall back to dlmmap_locked.  */
+static void *
+dlmmap (void *start, size_t length, int prot,
+	int flags, int fd, off_t offset)
+{
+  void *ptr;
+
+  assert (start == NULL && length % malloc_getpagesize == 0
+	  && prot == (PROT_READ | PROT_WRITE)
+	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS)
+	  && fd == -1 && offset == 0);
+
+#if FFI_CLOSURE_TEST
+  printf ("mapping in %zi\n", length);
+#endif
+
+  if (execfd == -1 && is_emutramp_enabled ())
+    {
+      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+      return ptr;
+    }
+
+  if (execfd == -1 && !is_selinux_enabled ())
+    {
+      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
+
+      if (ptr != MFAIL || (errno != EPERM && errno != EACCES))
+	/* Cool, no need to mess with separate segments.  */
+	return ptr;
+
+      /* If MREMAP_DUP is ever introduced and implemented, try mmap
+	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with
+	 MREMAP_DUP and prot at this point.  */
+    }
+
+  if (execsize == 0 || execfd == -1)
+    {
+      pthread_mutex_lock (&open_temp_exec_file_mutex);
+      ptr = dlmmap_locked (start, length, prot, flags, offset);
+      pthread_mutex_unlock (&open_temp_exec_file_mutex);
+
+      return ptr;
+    }
+
+  return dlmmap_locked (start, length, prot, flags, offset);
+}
+
+/* Release memory at the given address, as well as the corresponding
+   executable page if it's separate.  */
+static int
+dlmunmap (void *start, size_t length)
+{
+  /* We don't bother decreasing execsize or truncating the file, since
+     we can't quite tell whether we're unmapping the end of the file.
+     We don't expect frequent deallocation anyway.  If we did, we
+     could locate pages in the file by writing to the pages being
+     deallocated and checking that the file contents change.
+     Yuck.  */
+  msegmentptr seg = segment_holding (gm, start);
+  void *code;
+
+#if FFI_CLOSURE_TEST
+  printf ("unmapping %zi\n", length);
+#endif
+
+  if (seg && (code = add_segment_exec_offset (start, seg)) != start)
+    {
+      int ret = munmap (code, length);
+      if (ret)
+	return ret;
+    }
+
+  return munmap (start, length);
+}
+
+#if FFI_CLOSURE_FREE_CODE
+/* Return segment holding given code address.  */
+static msegmentptr
+segment_holding_code (mstate m, char* addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= add_segment_exec_offset (sp->base, sp)
+	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+#endif
+
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+/* Allocate a chunk of memory with the given size.  Returns a pointer
+   to the writable address, and sets *CODE to the executable
+   corresponding virtual address.  */
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  void *ptr;
+
+  if (!code)
+    return NULL;
+
+  ptr = dlmalloc (size);
+
+  if (ptr)
+    {
+      msegmentptr seg = segment_holding (gm, ptr);
+
+      *code = add_segment_exec_offset (ptr, seg);
+    }
+
+  return ptr;
+}
+
+/* Release a chunk of memory allocated with ffi_closure_alloc.  If
+   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
+   writable or the executable address given.  Otherwise, only the
+   writable address can be provided here.  */
+void
+ffi_closure_free (void *ptr)
+{
+#if FFI_CLOSURE_FREE_CODE
+  msegmentptr seg = segment_holding_code (gm, ptr);
+
+  if (seg)
+    ptr = sub_segment_exec_offset (ptr, seg);
+#endif
+
+  dlfree (ptr);
+}
+
+
+#if FFI_CLOSURE_TEST
+/* Do some internal sanity testing to make sure allocation and
+   deallocation of pages are working as intended.  */
+int main ()
+{
+  void *p[3];
+#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0)
+#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0)
+  GET (0, malloc_getpagesize / 2);
+  GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*));
+  PUT (1);
+  GET (1, 2 * malloc_getpagesize);
+  GET (2, malloc_getpagesize / 2);
+  PUT (1);
+  PUT (0);
+  PUT (2);
+  return 0;
+}
+#endif /* FFI_CLOSURE_TEST */
+# else /* ! FFI_MMAP_EXEC_WRIT */
+
+/* On many systems, memory returned by malloc is writable and
+   executable, so just use it.  */
+
+#include <stdlib.h>
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  return *code = malloc (size);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  free (ptr);
+}
+
+# endif /* ! FFI_MMAP_EXEC_WRIT */
+#endif /* FFI_CLOSURES */
--- a/BeefySysLib/third_party/libffi/ios/src/debug.c
+++ b/BeefySysLib/third_party/libffi/ios/src/debug.c
@ -0,0 +1,59 @@
+/* -----------------------------------------------------------------------
+   debug.c - Copyright (c) 1996 Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+/* General debugging routines */
+
+void ffi_stop_here(void)
+{
+  /* This function is only useful for debugging purposes.
+     Place a breakpoint on ffi_stop_here to be notified of
+     significant events. */
+}
+
+/* This function should only be called via the FFI_ASSERT() macro */
+
+void ffi_assert(char *expr, char *file, int line)
+{
+  fprintf(stderr, "ASSERTION FAILURE: %s at %s:%d\n", expr, file, line);
+  ffi_stop_here();
+  abort();
+}
+
+/* Perform a sanity check on an ffi_type structure */
+
+void ffi_type_test(ffi_type *a, char *file, int line)
+{
+  FFI_ASSERT_AT(a != NULL, file, line);
+
+  FFI_ASSERT_AT(a->type <= FFI_TYPE_LAST, file, line);
+  FFI_ASSERT_AT(a->type == FFI_TYPE_VOID || a->size > 0, file, line);
+  FFI_ASSERT_AT(a->type == FFI_TYPE_VOID || a->alignment > 0, file, line);
+  FFI_ASSERT_AT(a->type != FFI_TYPE_STRUCT || a->elements != NULL, file, line);
+
+}
--- a/BeefySysLib/third_party/libffi/ios/src/dlmalloc.c
+++ b/BeefySysLib/third_party/libffi/ios/src/dlmalloc.c
--- a/BeefySysLib/third_party/libffi/ios/src/java_raw_api.c
+++ b/BeefySysLib/third_party/libffi/ios/src/java_raw_api.c
@ -0,0 +1,356 @@
+/* -----------------------------------------------------------------------
+   java_raw_api.c - Copyright (c) 1999, 2007, 2008  Red Hat, Inc.
+
+   Cloned from raw_api.c
+
+   Raw_api.c author: Kresten Krab Thorup <krab@gnu.org>
+   Java_raw_api.c author: Hans-J. Boehm <hboehm@hpl.hp.com>
+
+   $Id $
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This defines a Java- and 64-bit specific variant of the raw API.	*/
+/* It assumes that "raw" argument blocks look like Java stacks on a	*/
+/* 64-bit machine.  Arguments that can be stored in a single stack	*/
+/* stack slots (longs, doubles) occupy 128 bits, but only the first	*/
+/* 64 bits are actually used.						*/
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+#if !defined(NO_JAVA_RAW_API) && !defined(FFI_NO_RAW_API)
+
+size_t
+ffi_java_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+      switch((*at) -> type) {
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  result += 2 * FFI_SIZEOF_JAVA_RAW;
+	  break;
+	case FFI_TYPE_STRUCT:
+	  /* No structure parameters in Java.	*/
+	  abort();
+	default:
+	  result += FFI_SIZEOF_JAVA_RAW;
+      }
+    }
+
+  return result;
+}
+
+
+void
+ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + 3);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + 2);
+	  break;
+
+#if FFI_SIZEOF_JAVA_RAW == 8
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  *args = (void *)raw;
+	  raw += 2;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+
+	default:
+	  *args = raw;
+	  raw +=
+	    ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+#if FFI_SIZEOF_JAVA_RAW == 8
+      switch((*tp)->type) {
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  *args = (void*) raw;
+	  raw += 2;
+	  break;
+	default:
+	  *args = (void*) raw++;
+      }
+#else /* FFI_SIZEOF_JAVA_RAW != 8 */
+	*args = (void*) raw;
+	raw +=
+	  ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+#endif /* FFI_SIZEOF_JAVA_RAW == 8 */
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT8*) (*args);
+#else
+	  (raw++)->uint = *(UINT8*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT8:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT8*) (*args);
+#else
+	  (raw++)->sint = *(SINT8*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_UINT16:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT16*) (*args);
+#else
+	  (raw++)->uint = *(UINT16*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT16:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT16*) (*args);
+#else
+	  (raw++)->sint = *(SINT16*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_UINT32:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT32*) (*args);
+#else
+	  (raw++)->uint = *(UINT32*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT32:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT32*) (*args);
+#else
+	  (raw++)->sint = *(SINT32*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  (raw++)->flt = *(FLOAT32*) (*args);
+	  break;
+
+#if FFI_SIZEOF_JAVA_RAW == 8
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  raw->uint = *(UINT64*) (*args);
+	  raw += 2;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+#if FFI_SIZEOF_JAVA_RAW == 8
+	  FFI_ASSERT(0);	/* Should have covered all cases */
+#else
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
+	  raw +=
+	    ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+#endif
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+static void
+ffi_java_rvalue_to_raw (ffi_cif *cif, void *rvalue)
+{
+#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_UINT32:
+      *(UINT64 *)rvalue <<= 32;
+      break;
+
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+#if FFI_SIZEOF_JAVA_RAW == 4
+    case FFI_TYPE_POINTER:
+#endif
+      *(SINT64 *)rvalue <<= 32;
+      break;
+
+    default:
+      break;
+    }
+#endif
+}
+
+static void
+ffi_java_raw_to_rvalue (ffi_cif *cif, void *rvalue)
+{
+#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_UINT32:
+      *(UINT64 *)rvalue >>= 32;
+      break;
+
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+      *(SINT64 *)rvalue >>= 32;
+      break;
+
+    default:
+      break;
+    }
+#endif
+}
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_java_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue,
+			ffi_java_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_java_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+  ffi_java_rvalue_to_raw (cif, rvalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_java_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_java_raw *raw = (ffi_java_raw*)alloca (ffi_java_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_java_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, (ffi_raw*)raw, cl->user_data);
+  ffi_java_raw_to_rvalue (cif, rvalue);
+}
+
+ffi_status
+ffi_prep_java_raw_closure_loc (ffi_java_raw_closure* cl,
+			       ffi_cif *cif,
+			       void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+			       void *user_data,
+			       void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_java_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_java_raw_closure (ffi_java_raw_closure* cl,
+			   ffi_cif *cif,
+			   void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+			   void *user_data)
+{
+  return ffi_prep_java_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
+#endif /* !FFI_NO_RAW_API */
--- a/BeefySysLib/third_party/libffi/ios/src/prep_cif.c
+++ b/BeefySysLib/third_party/libffi/ios/src/prep_cif.c
@ -0,0 +1,237 @@
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green
+                Copyright (c) 1996, 1998, 2007  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+/* Round up to FFI_SIZEOF_ARG. */
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
+static ffi_status initialize_aggregate(ffi_type *arg)
+{
+  ffi_type **ptr;
+
+  if (UNLIKELY(arg == NULL || arg->elements == NULL))
+    return FFI_BAD_TYPEDEF;
+
+  arg->size = 0;
+  arg->alignment = 0;
+
+  ptr = &(arg->elements[0]);
+
+  if (UNLIKELY(ptr == 0))
+    return FFI_BAD_TYPEDEF;
+
+  while ((*ptr) != NULL)
+    {
+      if (UNLIKELY(((*ptr)->size == 0)
+		    && (initialize_aggregate((*ptr)) != FFI_OK)))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+      arg->size = ALIGN(arg->size, (*ptr)->alignment);
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
+  arg->size = ALIGN (arg->size, arg->alignment);
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+/* For non variadic functions isvariadic should be 0 and
+   nfixedargs==ntotalargs.
+
+   For variadic calls, isvariadic should be 1 and nfixedargs
+   and ntotalargs set as appropriate. nfixedargs must always be >=1 */
+
+
+ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
+			     unsigned int isvariadic,
+                             unsigned int nfixedargs,
+                             unsigned int ntotalargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1));
+  FFI_ASSERT(nfixedargs <= ntotalargs);
+
+#ifndef X86_WIN32
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+#else
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI || abi == FFI_THISCALL))
+    return FFI_BAD_ABI;
+#endif
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = ntotalargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  /* Initialize the return type if necessary */
+  if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
+#if !defined M68K && !defined X86_ANY && !defined S390 && !defined PA
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+#ifdef SPARC
+      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
+#endif
+#ifdef TILE
+      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+      && (cif->rtype->size > 16)
+#endif
+
+     )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
+      if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type, do this
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
+#if !defined X86_ANY && !defined S390 && !defined PA
+#ifdef SPARC
+      if (((*ptr)->type == FFI_TYPE_STRUCT
+	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
+	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
+	      && cif->abi != FFI_V9))
+	bytes += sizeof(void*);
+      else
+#endif
+	{
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
+	    bytes = ALIGN(bytes, (*ptr)->alignment);
+
+#ifdef TILE
+	  if (bytes < 10 * FFI_SIZEOF_ARG &&
+	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+	    {
+	      /* An argument is never split between the 10 parameter
+		 registers and the stack.  */
+	      bytes = 10 * FFI_SIZEOF_ARG;
+	    }
+#endif
+#ifdef XTENSA
+	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+	    bytes = 6*4;
+#endif
+
+	  bytes += STACK_ARG_SIZE((*ptr)->size);
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+#ifdef FFI_TARGET_SPECIFIC_VARIADIC
+  if (isvariadic)
+	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs);
+#endif
+
+  return ffi_prep_cif_machdep(cif);
+}
+#endif /* not __CRIS__ */
+
+ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes);
+}
+
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+                            ffi_abi abi,
+                            unsigned int nfixedargs,
+                            unsigned int ntotalargs,
+                            ffi_type *rtype,
+                            ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+}
+
+#if FFI_CLOSURES
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+{
+  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure);
+}
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/raw_api.c
+++ b/BeefySysLib/third_party/libffi/ios/src/raw_api.c
@ -0,0 +1,254 @@
+/* -----------------------------------------------------------------------
+   raw_api.c - Copyright (c) 1999, 2008  Red Hat, Inc.
+
+   Author: Kresten Krab Thorup <krab@gnu.org>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This file defines generic functions for use with the raw api. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_NO_RAW_API
+
+size_t
+ffi_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+#if !FFI_NO_STRUCTS
+      if ((*at)->type == FFI_TYPE_STRUCT)
+	result += ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
+      else
+#endif
+	result += ALIGN ((*at)->size, FFI_SIZEOF_ARG);
+    }
+
+  return result;
+}
+
+
+void
+ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1);
+	  break;
+	  
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4	  
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4);
+	  break;
+#endif
+	
+#if !FFI_NO_STRUCTS  
+	case FFI_TYPE_STRUCT:
+	  *args = (raw++)->ptr;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+	  
+	default:
+	  *args = raw;
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+#if !FFI_NO_STRUCTS
+      if ((*tp)->type == FFI_TYPE_STRUCT)
+	{
+	  *args = (raw++)->ptr;
+	}
+      else
+#endif
+	{
+	  *args = (void*) raw;
+	  raw += ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
+	}
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	  (raw++)->uint = *(UINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT8:
+	  (raw++)->sint = *(SINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	  (raw++)->uint = *(UINT16*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT16:
+	  (raw++)->sint = *(SINT16*) (*args);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4
+	case FFI_TYPE_UINT32:
+	  (raw++)->uint = *(UINT32*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT32:
+	  (raw++)->sint = *(SINT32*) (*args);
+	  break;
+#endif
+
+#if !FFI_NO_STRUCTS
+	case FFI_TYPE_STRUCT:
+	  (raw++)->ptr = *args;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
+	  raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, raw, cl->user_data);
+}
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* cl,
+			  ffi_cif *cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
+
+#if FFI_CLOSURES
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure* cl,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data)
+{
+  return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+
+#endif /* !FFI_NO_RAW_API */
--- a/BeefySysLib/third_party/libffi/ios/src/types.c
+++ b/BeefySysLib/third_party/libffi/ios/src/types.c
@ -0,0 +1,77 @@
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* Hide the basic type definitions from the header file, so that we
+   can redefine them here as "const".  */
+#define LIBFFI_HIDE_BASIC_TYPES
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
+#define FFI_TYPEDEF(name, type, id)		\
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
+const ffi_type ffi_type_##name = {		\
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}
+
+/* Size and alignment are fake here. They must not be 0. */
+const ffi_type ffi_type_void = {
+  1, 1, FFI_TYPE_VOID, NULL
+};
+
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64);
+
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER);
+
+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE);
+
+#ifdef __alpha__
+/* Even if we're not configured to default to 128-bit long double, 
+   maintain binary compatibility, as -mlong-double-128 can be used
+   at any time.  */
+/* Validate the hard-coded number below.  */
+# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
+#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/darwin.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/darwin.S
@ -0,0 +1,449 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	Copyright (C) 2008  Free Software Foundation, Inc.
+
+   X86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl _ffi_prep_args
+
+	.align 4
+.globl _ffi_call_SYSV
+
+_ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        subl $8,%esp
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	subl  $8,%esp
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $16,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+	jmp   epilogue
+0:
+	.align 4
+	call 1f
+.Lstore_table:
+	.long   noretval-.Lstore_table		/* FFI_TYPE_VOID */
+	.long   retint-.Lstore_table		/* FFI_TYPE_INT */
+	.long   retfloat-.Lstore_table		/* FFI_TYPE_FLOAT */
+	.long   retdouble-.Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long   retlongdouble-.Lstore_table     /* FFI_TYPE_LONGDOUBLE */
+	.long   retuint8-.Lstore_table		/* FFI_TYPE_UINT8 */
+	.long   retsint8-.Lstore_table		/* FFI_TYPE_SINT8 */
+	.long   retuint16-.Lstore_table		/* FFI_TYPE_UINT16 */
+	.long   retsint16-.Lstore_table		/* FFI_TYPE_SINT16 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_UINT32 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_SINT32 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_UINT64 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_SINT64 */
+	.long   retstruct-.Lstore_table		/* FFI_TYPE_STRUCT */
+	.long   retint-.Lstore_table		/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpt (%ecx)
+	jmp   epilogue
+
+retint64:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+	popl %esi
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+.LFE1:
+.ffi_call_SYSV_end:
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl _ffi_closure_SYSV
+
+_ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	L_ffi_closure_SYSV_inner$stub
+	movl	8(%esp), %ebx
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	lea -8(%ebp),%esp
+	movl	%ebp, %esp
+	popl	%ebp
+	ret $4
+.LFE2:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl _ffi_closure_raw_SYSV
+
+_ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+#endif
+
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+	.indirect_symbol _ffi_closure_SYSV_inner
+	hlt ; hlt ; hlt ; hlt ; hlt
+
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0
+LSCIE1:
+	.long	0x0
+	.byte	0x1
+	.ascii "zR\0"
+	.byte	0x1
+	.byte	0x7c
+	.byte	0x8
+	.byte	0x1
+	.byte	0x10
+	.byte	0xc
+	.byte	0x5
+	.byte	0x4
+	.byte	0x88
+	.byte	0x1
+	.align 2
+LECIE1:
+.globl _ffi_call_SYSV.eh
+_ffi_call_SYSV.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1
+	.long	.LFB1-.
+	.set L$set$2,.LFE1-.LFB1
+	.long L$set$2
+	.byte	0x0
+	.byte	0x4
+	.set L$set$3,.LCFI0-.LFB1
+	.long L$set$3
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$4,.LCFI1-.LCFI0
+	.long L$set$4
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE1:
+.globl _ffi_closure_SYSV.eh
+_ffi_closure_SYSV.eh:
+LSFDE2:
+	.set	L$set$5,LEFDE2-LASFDE2
+	.long	L$set$5
+LASFDE2:
+	.long	LASFDE2-EH_frame1
+	.long	.LFB2-.
+	.set L$set$6,.LFE2-.LFB2
+	.long L$set$6
+	.byte	0x0
+	.byte	0x4
+	.set L$set$7,.LCFI2-.LFB2
+	.long L$set$7
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$8,.LCFI3-.LCFI2
+	.long L$set$8
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.globl _ffi_closure_raw_SYSV.eh
+_ffi_closure_raw_SYSV.eh:
+LSFDE3:
+	.set	L$set$10,LEFDE3-LASFDE3
+	.long	L$set$10
+LASFDE3:
+	.long	LASFDE3-EH_frame1
+	.long	.LFB3-.
+	.set L$set$11,.LFE3-.LFB3
+	.long L$set$11
+	.byte	0x0
+	.byte	0x4
+	.set L$set$12,.LCFI4-.LFB3
+	.long L$set$12
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$13,.LCFI5-.LCFI4
+	.long L$set$13
+	.byte	0xd
+	.byte	0x4
+	.byte	0x4
+	.set L$set$14,.LCFI6-.LCFI5
+	.long L$set$14
+	.byte	0x85
+	.byte	0x3
+	.align 2
+LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/darwin64.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/darwin64.S
@ -0,0 +1,421 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+	        Copyright (c) 2008 Red Hat, Inc.
+   derived from unix64.S
+
+   x86-64 Foreign Function Interface for Darwin.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+		    void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
+	.globl	_ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	Lload_sse
+Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lstore_table:
+	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
+	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
+	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
+	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
+	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
+	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
+	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
+	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lst_void:
+	ret
+	.align	3
+Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align	3
+Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+	.align	3
+Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align	3
+LUW3:
+Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	Lret_from_load_sse
+
+LUW4:
+	.align	3
+	.globl	_ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      Lsave_sse
+Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	_ffi_closure_unix64_inner
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lload_table:
+	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
+	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
+	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
+	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
+	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lld_void:
+	ret
+	.align	3
+Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+	.align	3
+Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+	.align	3
+Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above Lload_sse; the same logic applies here.  */
+	.align	3
+LUW8:
+Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
+	.long	L$set$0
+LSCIE1:
+	.long	0x0		/* CIE Identifier Tag */
+	.byte	0x1		/* CIE Version */
+	.ascii	"zR\0"		/* CIE Augmentation */
+	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
+	.byte	0x10		/* CIE RA Column */
+	.byte	0x1		/* uleb128 0x1; Augmentation size */
+	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7		/* uleb128 0x7 */
+	.byte	0x8		/* uleb128 0x8 */
+	.byte	0x90		/* DW_CFA_offset, column 0x10 */
+	.byte	0x1
+	.align	3
+LECIE1:
+	.globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
+	.quad	LUW0-.			/* FDE initial location */
+	.set	L$set$2,LUW4-LUW0	/* FDE address range */
+	.quad	L$set$2
+	.byte	0x0			/* Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$3,LUW1-LUW0
+	.long	L$set$3
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.byte	0x6
+	.byte	0x20
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.byte	0x2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$4,LUW2-LUW1
+	.long	L$set$4
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7
+	.byte	0x8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$5,LUW3-LUW2
+	.long	L$set$5
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE1:
+	.globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
+	.long	L$set$6
+LASFDE3:
+	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
+	.quad	LUW5-.			/* FDE initial location */
+	.set	L$set$7,LUW9-LUW5	/* FDE address range */
+	.quad	L$set$7
+	.byte	0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$8,LUW6-LUW5
+	.long	L$set$8
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	208,1			/* uleb128 208 */
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$9,LUW7-LUW6
+	.long	L$set$9
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	0x8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$10,LUW8-LUW7
+	.long	L$set$10
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE3:
+	.subsections_via_symbols
+
+#endif /* __x86_64__ */
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/ffi.c
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/ffi.c
@ -0,0 +1,873 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+#ifdef X86_WIN32
+  size_t p_stack_args[2];
+  void *p_stack_data[2];
+  char *argp2 = stack;
+  int stack_args_count = 0;
+  int cabi = ecif->cif->abi;
+#endif
+
+  argp = stack;
+
+  if ((ecif->cif->flags == FFI_TYPE_STRUCT
+       || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+      && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+          && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+      )
+    {
+      *(void **) argp = ecif->rvalue;
+#ifdef X86_WIN32
+      /* For fastcall/thiscall this is first register-passed
+         argument.  */
+      if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	{
+	  p_stack_args[stack_args_count] = sizeof (void*);
+	  p_stack_data[stack_args_count] = argp;
+	  ++stack_args_count;
+	}
+#endif
+      argp += sizeof(void*);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+#ifdef X86_WIN64
+      if (z > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = sizeof(ffi_arg);
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < sizeof(ffi_arg))
+        {
+          z = sizeof(ffi_arg);
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+
+#ifdef X86_WIN32
+    /* For thiscall/fastcall convention register-passed arguments
+       are the first two none-floating-point arguments with a size
+       smaller or equal to sizeof (void*).  */
+    if ((cabi == FFI_THISCALL && stack_args_count < 1)
+        || (cabi == FFI_FASTCALL && stack_args_count < 2))
+      {
+	if (z <= 4
+	    && ((*p_arg)->type != FFI_TYPE_FLOAT
+	        && (*p_arg)->type != FFI_TYPE_STRUCT))
+	  {
+	    p_stack_args[stack_args_count] = z;
+	    p_stack_data[stack_args_count] = argp;
+	    ++stack_args_count;
+	  }
+      }
+#endif
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+
+#ifdef X86_WIN32
+  /* We need to move the register-passed arguments for thiscall/fastcall
+     on top of stack, so that those can be moved to registers ecx/edx by
+     call-handler.  */
+  if (stack_args_count > 0)
+    {
+      size_t zz = (p_stack_args[0] + 3) & ~3;
+      char *h;
+
+      /* Move first argument to top-stack position.  */
+      if (p_stack_data[0] != argp2)
+	{
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[0], zz);
+	  memmove (argp2 + zz, argp2,
+	           (size_t) ((char *) p_stack_data[0] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+
+      argp2 += zz;
+      --stack_args_count;
+      if (zz > 4)
+	stack_args_count = 0;
+
+      /* If we have a second argument, then move it on top
+         after the first one.  */
+      if (stack_args_count > 0 && p_stack_data[1] != argp2)
+	{
+	  zz = p_stack_args[1];
+	  zz = (zz + 3) & ~3;
+	  h = alloca (zz + 1);
+	  h = alloca (zz + 1);
+	  memcpy (h, p_stack_data[1], zz);
+	  memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
+	  memcpy (argp2, h, zz);
+	}
+    }
+#endif
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+#ifdef X86_WIN32
+          if (cif->abi == FFI_MS_CDECL)
+            cif->flags = FFI_TYPE_MS_STRUCT;
+          else
+#endif
+            cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+	  //BCF
+	  //int align = (*ptr)->alignment;
+	  int align = sizeof(int);
+
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, align);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+	//BCF - was unconditional before
+#ifndef X86_WIN32
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && cif->rtype->size != 1 && cif->rtype->size != 2
+      && cif->rtype->size != 4 && cif->rtype->size != 8)
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                     cif->flags, ecif.rvalue, fn);
+      break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC or SUNPRO_C -- standard conventions apply. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+                                         void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+                            ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+#ifdef X86_WIN64
+  if (cif->rtype->size > sizeof(ffi_arg)
+      || (cif->flags == FFI_TYPE_STRUCT
+          && (cif->rtype->size != 1 && cif->rtype->size != 2
+              && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#else
+  if ( cif->flags == FFI_TYPE_STRUCT
+       || cif->flags == FFI_TYPE_MS_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#endif
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp) {
+        argp = (char *) ALIGN(argp, sizeof(void*));
+      }
+
+#ifdef X86_WIN64
+      if ((*p_arg)->size > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+                  && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+        {
+          z = sizeof(void *);
+          *p_argv = *(void **)argp;
+        }
+      else
+#endif
+        {
+          z = (*p_arg)->size;
+          
+          /* because we're little endian, this is what it turns into.   */
+          
+          *p_argv = (void*) argp;
+        }
+          
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#ifdef zX86_WIN32
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char*) &__tramp[5] = 0x58; /* pop eax - old return */ \
+   *(unsigned char*) &__tramp[6] = 0x51; /* push ecx - this */ \
+   *(unsigned char*) &__tramp[7] = 0x50; /* push eax */ \
+   *(unsigned char *)  &__tramp[8] = 0xe8; \
+   *(unsigned int*)  &__tramp[9] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[13] = 0xc2; \
+   *(unsigned short*)  &__tramp[14] = __size; /* ret __size  */ \
+}
+#else
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 49);  \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned int *) &__tramp[0] = 0x8324048b;	/* mov (%esp), %eax */ \
+   *(unsigned int *) &__tramp[4] = 0x4c890cec;	/* sub $12, %esp */ \
+   *(unsigned int *) &__tramp[8] = 0x04890424;	/* mov %ecx, 4(%esp) */ \
+   *(unsigned char*) &__tramp[12] = 0x24;	/* mov %eax, (%esp) */ \
+   *(unsigned char*) &__tramp[13] = 0xb8; \
+   *(unsigned int *) &__tramp[14] = __size;	/* mov __size, %eax */ \
+   *(unsigned int *) &__tramp[18] = 0x08244c8d;	/* lea 8(%esp), %ecx */ \
+   *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
+   *(unsigned short*) &__tramp[26] = 0x0b74;	/* jz 1f */ \
+   *(unsigned int *) &__tramp[28] = 0x8908518b;	/* 2b: mov 8(%ecx), %edx */ \
+   *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
+   *(unsigned char*) &__tramp[36] = 0x48;	/* dec %eax */ \
+   *(unsigned short*) &__tramp[37] = 0xf575;	/* jnz 2b ; 1f: */ \
+   *(unsigned char*) &__tramp[39] = 0xb8; \
+   *(unsigned int*)  &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[44] = 0xe8; \
+   *(unsigned int*)  &__tramp[45] = __dis; /* call __fun  */ \
+   *(unsigned char*)  &__tramp[49] = 0xc2; /* ret  */ \
+   *(unsigned short*)  &__tramp[50] = (__size + 8); /* ret (__size + 8)  */ \
+ }
+
+#endif
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
+				    &ffi_closure_THISCALL,
+				    (void*)codeloc,
+				    cif->bytes);
+    }
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc, cif->bytes);
+    }
+  else if (cif->abi == FFI_MS_CDECL)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+    if (cif->abi != FFI_THISCALL)
+#endif
+    return FFI_BAD_ABI;
+  }
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+#ifdef X86_WIN32
+  if (cif->abi == FFI_SYSV)
+    {
+#endif
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+#ifdef X86_WIN32
+    }
+  else if (cif->abi == FFI_THISCALL)
+    {
+      FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+				    codeloc, cif->bytes);
+    }
+#endif
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if (rvalue == NULL
+      && (cif->flags == FFI_TYPE_STRUCT
+          || cif->flags == FFI_TYPE_MS_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_MS_CDECL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+		     ecif.rvalue, fn);
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      {
+	unsigned int abi = cif->abi;
+	unsigned int i, passed_regs = 0;
+
+	if (cif->flags == FFI_TYPE_STRUCT)
+	  ++passed_regs;
+
+	for (i=0; i < cif->nargs && passed_regs < 2;i++)
+	  {
+	    size_t sz;
+
+	    if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+	        || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+	      continue;
+	    sz = (cif->arg_types[i]->size + 3) & ~3;
+	    if (sz == 0 || sz > 4)
+	      continue;
+	    ++passed_regs;
+	  }
+	if (passed_regs < 2 && abi == FFI_FASTCALL)
+	  cif->abi = abi = FFI_THISCALL;
+	if (passed_regs < 1 && abi == FFI_THISCALL)
+	  cif->abi = abi = FFI_STDCALL;
+        ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
+                       ecif.rvalue, fn);
+      }
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/ffi64.c
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/ffi64.c
@ -0,0 +1,678 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+             
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS]; 
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	int size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  /* Sanity check on the cif ABI.  */
+  {
+    int abi = cif->abi;
+    if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+      return FFI_BAD_ABI;
+  }
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *((unsigned long long * volatile) &tramp[1])
+    = (unsigned long) ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *((unsigned long long * volatile) &tramp[6])
+    = (unsigned long) codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/freebsd.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/freebsd.S
@ -0,0 +1,463 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	       Copyright (c) 2008  Björn König
+	
+   X86 Foreign Function Interface for FreeBSD
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1	/* FDE initial location */
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB2-.	/* FDE initial location */
+#else
+	.long	.LFB2
+#endif
+	.long	.LFE2-.LFB2	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/sysv.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/sysv.S
@ -0,0 +1,488 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 2013  The Written Word, Inc.
+	  - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+        /* Align the stack pointer to 16-bytes */
+        andl  $0xfffffff0, %esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+#ifdef __SUNPRO_C
+	/* The SUNPRO compiler doesn't support GCC's regparm function
+  	   attribute, so we have to pass all three arguments to
+	   ffi_closure_SYSV_inner on the stack.  */
+	movl	%edx, 8(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &resp */
+	movl    %eax, (%esp)    /* closure */
+#else
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#endif
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+/* Precalculate for e.g. the Solaris 10/x86 assembler.  */
+#if FFI_TRAMPOLINE_SIZE == 10
+#define RAW_CLOSURE_CIF_OFFSET 12
+#define RAW_CLOSURE_FUN_OFFSET 16
+#define RAW_CLOSURE_USER_DATA_OFFSET 20
+#elif FFI_TRAMPOLINE_SIZE == 24
+#define RAW_CLOSURE_CIF_OFFSET 24
+#define RAW_CLOSURE_FUN_OFFSET 28
+#define RAW_CLOSURE_USER_DATA_OFFSET 32
+#else
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#endif
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+#if defined __GNUC__
+/* Only emit dwarf unwind info when building with GNU toolchain.  */
+
+#if defined __PIC__
+# if defined __sun__ && defined __svr4__
+/* 32-bit Solaris 2/x86 uses datarel encoding for PIC.  GNU ld before 2.22
+   doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this.  */
+#  define FDE_ENCODING		0x30	/* datarel */
+#  define FDE_ENCODE(X)		X@GOTOFF
+# else
+#  define FDE_ENCODING		0x1b	/* pcrel sdata4 */
+#  if defined HAVE_AS_X86_PCREL
+#   define FDE_ENCODE(X)	X-.
+#  else
+#   define FDE_ENCODE(X)	X@rel
+#  endif
+# endif
+#else
+# define FDE_ENCODING		0	/* absolute */
+# define FDE_ENCODE(X)		X
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef HAVE_AS_ASCII_PSEUDO_OP
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+#elif defined HAVE_AS_STRING_PSEUDO_OP
+#ifdef __PIC__
+	.string "zR"	/* CIE Augmentation */
+#else
+	.string ""	/* CIE Augmentation */
+#endif
+#else
+#error missing .ascii/.string
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	FDE_ENCODING
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB1)	/* FDE initial location */
+	.long	.LFE1-.LFB1		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB2)	/* FDE initial location */
+	.long	.LFE2-.LFB2		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB3)	/* FDE initial location */
+	.long	.LFE3-.LFB3		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/unix64.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/unix64.S
@ -0,0 +1,437 @@
+#if !defined(__arm__) && defined(__i386__)
+
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2013  The Written Word, Inc.
+	    - Copyright (c) 2008  Red Hat, Inc
+	    - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	2
+	.globl	ffi_call_unix64
+	.type	ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+.LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+.LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	.Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lstore_table:
+	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_INT */
+	.long	.Lst_float-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	.Lst_double-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lst_ldouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lst_uint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	.Lst_sint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	.Lst_uint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	.Lst_sint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	.Lst_uint32-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	.Lst_struct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lst_void:
+	ret
+	.align 2
+
+.Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+
+	.align 2
+.Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align 2
+.Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+.Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+
+	.align 2
+.Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.LUW3:
+.Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	.Lret_from_load_sse
+
+.LUW4:
+	.size    ffi_call_unix64,.-ffi_call_unix64
+
+	.align	2
+	.globl ffi_closure_unix64
+	.type	ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+.LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      .Lsave_sse
+.Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	ffi_closure_unix64_inner@PLT
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+.LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	.Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lload_table:
+	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_INT */
+	.long	.Lld_float-.Lload_table		/* FFI_TYPE_FLOAT */
+	.long	.Lld_double-.Lload_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lld_ldouble-.Lload_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_UINT8 */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_SINT8 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_UINT16 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_SINT16 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_UINT32 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_SINT32 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_UINT64 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_SINT64 */
+	.long	.Lld_struct-.Lload_table	/* FFI_TYPE_STRUCT */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lld_void:
+	ret
+
+	.align 2
+.Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+
+	.align 2
+.Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+
+	.align 2
+.Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above .Lload_sse; the same logic applies here.  */
+	.align 2
+.LUW8:
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
+.LUW9:
+	.size	ffi_closure_unix64,.-ffi_closure_unix64
+
+#ifdef __GNUC__
+/* Only emit DWARF unwind info when building with the GNU toolchain.  */
+
+#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
+	.section	.eh_frame,"a",@unwind
+#else
+	.section	.eh_frame,"a",@progbits
+#endif
+.Lframe1:
+	.long	.LECIE1-.LSCIE1		/* CIE Length */
+.LSCIE1:
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii "zR\0"			/* CIE Augmentation */
+	.uleb128 1			/* CIE Code Alignment Factor */
+	.sleb128 -8			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.uleb128 1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0x80+16			/* DW_CFA_offset, %rip offset 1*-8 */
+	.uleb128 1
+	.align 8
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW0-.			/* FDE initial location */
+#else
+	.long	.LUW0@rel
+#endif
+	.long	.LUW4-.LUW0		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW1-.LUW0
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.uleb128 6
+	.uleb128 32
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.uleb128 2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW2-.LUW1
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW3-.LUW2
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE1:
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW5-.			/* FDE initial location */
+#else
+	.long	.LUW5@rel
+#endif
+	.long	.LUW9-.LUW5		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW6-.LUW5
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 208
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW7-.LUW6
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW8-.LUW7
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE3:
+
+#endif /* __GNUC__ */
+	
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/win32.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/win32.S
--- a/BeefySysLib/third_party/libffi/ios/src/x86/win64.S
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/win64.S
@ -0,0 +1,473 @@
+#if !defined(__arm__) && defined(__i386__)
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+# ffi_closure_win64 will be called with these registers set:
+#    rax points to 'closure'
+#    r11 contains a bit mask that specifies which of the
+#    first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+# 
+	.balign 16
+        .globl SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	# copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+	sub	$40, %rsp
+#.ENDPROLOG
+	mov	%rax, %rcx	# context is first parameter
+	mov	%rsp, %rdx	# stack is second parameter
+	add	$48, %rdx	# point to start of arguments
+	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
+	callq	*%rax		# call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	# If the closure returned a float,
+                                # ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+        # copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        #.PUSHREG rbp
+	push	%rbp
+        #.ALLOCSTACK 48
+	sub	$48,%rsp
+        #.SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        #.ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
+
+
+#endif
--- a/BeefySysLib/third_party/libffi/ios/src/x86/win64.S_old
+++ b/BeefySysLib/third_party/libffi/ios/src/x86/win64.S_old
@ -0,0 +1,473 @@
+#if !defined(__arm__) && defined(__i386__)
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+
+#else
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.text
+
+.extern SYMBOL_NAME(ffi_closure_win64_inner)
+
+# ffi_closure_win64 will be called with these registers set:
+#    rax points to 'closure'
+#    r11 contains a bit mask that specifies which of the
+#    first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+# 
+	.balign 16
+        .globl SYMBOL_NAME(ffi_closure_win64)
+SYMBOL_NAME(ffi_closure_win64):
+	# copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+	sub	$40, %rsp
+#.ENDPROLOG
+	mov	%rax, %rcx	# context is first parameter
+	mov	%rsp, %rdx	# stack is second parameter
+	add	$48, %rdx	# point to start of arguments
+	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
+	callq	*%rax		# call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	# If the closure returned a float,
+                                # ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	SYMBOL_NAME(ffi_call_win64)
+SYMBOL_NAME(ffi_call_win64):
+        # copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        #.PUSHREG rbp
+	push	%rbp
+        #.ALLOCSTACK 48
+	sub	$48,%rsp
+        #.SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        #.ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
+
+
+#endif