1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-07-04 15:26:00 +02:00

Initial checkin

This commit is contained in:
Brian Fiete 2019-08-23 11:56:54 -07:00
parent c74712dad9
commit 078564ac9e
3242 changed files with 1616395 additions and 0 deletions

View file

@ -0,0 +1,444 @@
/* -----------------------------------------------------------------------
darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
Copyright (C) 2008 Free Software Foundation, Inc.
X86 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
*/
#ifndef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
.text
.globl _ffi_prep_args
.align 4
.globl _ffi_call_SYSV
_ffi_call_SYSV:
.LFB1:
pushl %ebp
.LCFI0:
movl %esp,%ebp
.LCFI1:
subl $8,%esp
/* Make room for all of the new args. */
movl 16(%ebp),%ecx
subl %ecx,%esp
movl %esp,%eax
/* Place all of the ffi_prep_args in position */
subl $8,%esp
pushl 12(%ebp)
pushl %eax
call *8(%ebp)
/* Return stack to previous state and call the function */
addl $16,%esp
call *28(%ebp)
/* Load %ecx with the return type code */
movl 20(%ebp),%ecx
/* Protect %esi. We're going to pop it in the epilogue. */
pushl %esi
/* If the return value pointer is NULL, assume no return value. */
cmpl $0,24(%ebp)
jne 0f
/* Even if there is no space for the return value, we are
obliged to handle floating-point values. */
cmpl $FFI_TYPE_FLOAT,%ecx
jne noretval
fstp %st(0)
jmp epilogue
0:
.align 4
call 1f
.Lstore_table:
.long noretval-.Lstore_table /* FFI_TYPE_VOID */
.long retint-.Lstore_table /* FFI_TYPE_INT */
.long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
.long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
.long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
.long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
.long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
.long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
.long retint-.Lstore_table /* FFI_TYPE_UINT32 */
.long retint-.Lstore_table /* FFI_TYPE_SINT32 */
.long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
.long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
.long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
.long retint-.Lstore_table /* FFI_TYPE_POINTER */
.long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
.long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
1:
pop %esi
add (%esi, %ecx, 4), %esi
jmp *%esi
/* Sign/zero extend as appropriate. */
retsint8:
movsbl %al, %eax
jmp retint
retsint16:
movswl %ax, %eax
jmp retint
retuint8:
movzbl %al, %eax
jmp retint
retuint16:
movzwl %ax, %eax
jmp retint
retfloat:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstps (%ecx)
jmp epilogue
retdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpl (%ecx)
jmp epilogue
retlongdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpt (%ecx)
jmp epilogue
retint64:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
movl %edx,4(%ecx)
jmp epilogue
retstruct1b:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movb %al,0(%ecx)
jmp epilogue
retstruct2b:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movw %ax,0(%ecx)
jmp epilogue
retint:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
retstruct:
/* Nothing to do! */
noretval:
epilogue:
popl %esi
movl %ebp,%esp
popl %ebp
ret
.LFE1:
.ffi_call_SYSV_end:
.align 4
FFI_HIDDEN (ffi_closure_SYSV)
.globl _ffi_closure_SYSV
_ffi_closure_SYSV:
.LFB2:
pushl %ebp
.LCFI2:
movl %esp, %ebp
.LCFI3:
subl $40, %esp
leal -24(%ebp), %edx
movl %edx, -12(%ebp) /* resp */
leal 8(%ebp), %edx
movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
leal -12(%ebp), %edx
movl %edx, (%esp) /* &resp */
movl %ebx, 8(%esp)
.LCFI7:
call L_ffi_closure_SYSV_inner$stub
movl 8(%esp), %ebx
movl -12(%ebp), %ecx
cmpl $FFI_TYPE_INT, %eax
je .Lcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lcls_retint
0: cmpl $FFI_TYPE_FLOAT, %eax
je .Lcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lcls_retllong
cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
je .Lcls_retstruct1b
cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
je .Lcls_retstruct2b
cmpl $FFI_TYPE_STRUCT, %eax
je .Lcls_retstruct
.Lcls_epilogue:
movl %ebp, %esp
popl %ebp
ret
.Lcls_retint:
movl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retfloat:
flds (%ecx)
jmp .Lcls_epilogue
.Lcls_retdouble:
fldl (%ecx)
jmp .Lcls_epilogue
.Lcls_retldouble:
fldt (%ecx)
jmp .Lcls_epilogue
.Lcls_retllong:
movl (%ecx), %eax
movl 4(%ecx), %edx
jmp .Lcls_epilogue
.Lcls_retstruct1b:
movsbl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retstruct2b:
movswl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retstruct:
lea -8(%ebp),%esp
movl %ebp, %esp
popl %ebp
ret $4
.LFE2:
#if !FFI_NO_RAW_API
#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
.align 4
FFI_HIDDEN (ffi_closure_raw_SYSV)
.globl _ffi_closure_raw_SYSV
_ffi_closure_raw_SYSV:
.LFB3:
pushl %ebp
.LCFI4:
movl %esp, %ebp
.LCFI5:
pushl %esi
.LCFI6:
subl $36, %esp
movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
movl %edx, 12(%esp) /* user_data */
leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
movl %edx, 8(%esp) /* raw_args */
leal -24(%ebp), %edx
movl %edx, 4(%esp) /* &res */
movl %esi, (%esp) /* cif */
call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
cmpl $FFI_TYPE_INT, %eax
je .Lrcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lrcls_retint
0:
cmpl $FFI_TYPE_FLOAT, %eax
je .Lrcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lrcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lrcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lrcls_retllong
.Lrcls_epilogue:
addl $36, %esp
popl %esi
popl %ebp
ret
.Lrcls_retint:
movl -24(%ebp), %eax
jmp .Lrcls_epilogue
.Lrcls_retfloat:
flds -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retdouble:
fldl -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retldouble:
fldt -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retllong:
movl -24(%ebp), %eax
movl -20(%ebp), %edx
jmp .Lrcls_epilogue
.LFE3:
#endif
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
L_ffi_closure_SYSV_inner$stub:
.indirect_symbol _ffi_closure_SYSV_inner
hlt ; hlt ; hlt ; hlt ; hlt
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame1:
.set L$set$0,LECIE1-LSCIE1
.long L$set$0
LSCIE1:
.long 0x0
.byte 0x1
.ascii "zR\0"
.byte 0x1
.byte 0x7c
.byte 0x8
.byte 0x1
.byte 0x10
.byte 0xc
.byte 0x5
.byte 0x4
.byte 0x88
.byte 0x1
.align 2
LECIE1:
.globl _ffi_call_SYSV.eh
_ffi_call_SYSV.eh:
LSFDE1:
.set L$set$1,LEFDE1-LASFDE1
.long L$set$1
LASFDE1:
.long LASFDE1-EH_frame1
.long .LFB1-.
.set L$set$2,.LFE1-.LFB1
.long L$set$2
.byte 0x0
.byte 0x4
.set L$set$3,.LCFI0-.LFB1
.long L$set$3
.byte 0xe
.byte 0x8
.byte 0x84
.byte 0x2
.byte 0x4
.set L$set$4,.LCFI1-.LCFI0
.long L$set$4
.byte 0xd
.byte 0x4
.align 2
LEFDE1:
.globl _ffi_closure_SYSV.eh
_ffi_closure_SYSV.eh:
LSFDE2:
.set L$set$5,LEFDE2-LASFDE2
.long L$set$5
LASFDE2:
.long LASFDE2-EH_frame1
.long .LFB2-.
.set L$set$6,.LFE2-.LFB2
.long L$set$6
.byte 0x0
.byte 0x4
.set L$set$7,.LCFI2-.LFB2
.long L$set$7
.byte 0xe
.byte 0x8
.byte 0x84
.byte 0x2
.byte 0x4
.set L$set$8,.LCFI3-.LCFI2
.long L$set$8
.byte 0xd
.byte 0x4
.align 2
LEFDE2:
#if !FFI_NO_RAW_API
.globl _ffi_closure_raw_SYSV.eh
_ffi_closure_raw_SYSV.eh:
LSFDE3:
.set L$set$10,LEFDE3-LASFDE3
.long L$set$10
LASFDE3:
.long LASFDE3-EH_frame1
.long .LFB3-.
.set L$set$11,.LFE3-.LFB3
.long L$set$11
.byte 0x0
.byte 0x4
.set L$set$12,.LCFI4-.LFB3
.long L$set$12
.byte 0xe
.byte 0x8
.byte 0x84
.byte 0x2
.byte 0x4
.set L$set$13,.LCFI5-.LCFI4
.long L$set$13
.byte 0xd
.byte 0x4
.byte 0x4
.set L$set$14,.LCFI6-.LCFI5
.long L$set$14
.byte 0x85
.byte 0x3
.align 2
LEFDE3:
#endif
#endif /* ifndef __x86_64__ */

View file

@ -0,0 +1,416 @@
/* -----------------------------------------------------------------------
darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
Copyright (c) 2008 Red Hat, Inc.
derived from unix64.S
x86-64 Foreign Function Interface for Darwin.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#ifdef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
.file "darwin64.S"
.text
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
.align 3
.globl _ffi_call_unix64
_ffi_call_unix64:
LUW0:
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
movq %rcx, 8(%rax) /* Save raddr. */
movq %rbp, 16(%rax) /* Save old frame pointer. */
movq %r10, 24(%rax) /* Relocate return address. */
movq %rax, %rbp /* Finalize local stack frame. */
LUW1:
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
movq 8(%r10), %rsi
movq 16(%r10), %rdx
movq 24(%r10), %rcx
movq 32(%r10), %r8
movq 40(%r10), %r9
testl %eax, %eax
jnz Lload_sse
Lret_from_load_sse:
/* Deallocate the reg arg area. */
leaq 176(%r10), %rsp
/* Call the user function. */
call *%r11
/* Deallocate stack arg area; local stack frame in redzone. */
leaq 24(%rbp), %rsp
movq 0(%rbp), %rcx /* Reload flags. */
movq 8(%rbp), %rdi /* Reload raddr. */
movq 16(%rbp), %rbp /* Reload old frame pointer. */
LUW2:
/* The first byte of the flags contains the FFI_TYPE. */
movzbl %cl, %r10d
leaq Lstore_table(%rip), %r11
movslq (%r11, %r10, 4), %r10
addq %r11, %r10
jmp *%r10
Lstore_table:
.long Lst_void-Lstore_table /* FFI_TYPE_VOID */
.long Lst_sint32-Lstore_table /* FFI_TYPE_INT */
.long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */
.long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */
.long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */
.long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */
.long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */
.long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */
.long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */
.long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */
.long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */
.long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */
.long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */
.long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */
.text
.align 3
Lst_void:
ret
.align 3
Lst_uint8:
movzbq %al, %rax
movq %rax, (%rdi)
ret
.align 3
Lst_sint8:
movsbq %al, %rax
movq %rax, (%rdi)
ret
.align 3
Lst_uint16:
movzwq %ax, %rax
movq %rax, (%rdi)
.align 3
Lst_sint16:
movswq %ax, %rax
movq %rax, (%rdi)
ret
.align 3
Lst_uint32:
movl %eax, %eax
movq %rax, (%rdi)
.align 3
Lst_sint32:
cltq
movq %rax, (%rdi)
ret
.align 3
Lst_int64:
movq %rax, (%rdi)
ret
.align 3
Lst_float:
movss %xmm0, (%rdi)
ret
.align 3
Lst_double:
movsd %xmm0, (%rdi)
ret
Lst_ldouble:
fstpt (%rdi)
ret
.align 3
Lst_struct:
leaq -20(%rsp), %rsi /* Scratch area in redzone. */
/* We have to locate the values now, and since we don't want to
write too much data into the user's return value, we spill the
value to a 16 byte scratch area first. Bits 8, 9, and 10
control where the values are located. Only one of the three
bits will be set; see ffi_prep_cif_machdep for the pattern. */
movd %xmm0, %r10
movd %xmm1, %r11
testl $0x100, %ecx
cmovnz %rax, %rdx
cmovnz %r10, %rax
testl $0x200, %ecx
cmovnz %r10, %rdx
testl $0x400, %ecx
cmovnz %r10, %rax
cmovnz %r11, %rdx
movq %rax, (%rsi)
movq %rdx, 8(%rsi)
/* Bits 12-31 contain the true size of the structure. Copy from
the scratch area to the true destination. */
shrl $12, %ecx
rep movsb
ret
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
.align 3
LUW3:
Lload_sse:
movdqa 48(%r10), %xmm0
movdqa 64(%r10), %xmm1
movdqa 80(%r10), %xmm2
movdqa 96(%r10), %xmm3
movdqa 112(%r10), %xmm4
movdqa 128(%r10), %xmm5
movdqa 144(%r10), %xmm6
movdqa 160(%r10), %xmm7
jmp Lret_from_load_sse
LUW4:
.align 3
.globl _ffi_closure_unix64
_ffi_closure_unix64:
LUW5:
/* The carry flag is set by the trampoline iff SSE registers
are used. Don't clobber it before the branch instruction. */
leaq -200(%rsp), %rsp
LUW6:
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rcx, 24(%rsp)
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
jc Lsave_sse
Lret_from_save_sse:
movq %r10, %rdi
leaq 176(%rsp), %rsi
movq %rsp, %rdx
leaq 208(%rsp), %rcx
call _ffi_closure_unix64_inner
/* Deallocate stack frame early; return value is now in redzone. */
addq $200, %rsp
LUW7:
/* The first byte of the return value contains the FFI_TYPE. */
movzbl %al, %r10d
leaq Lload_table(%rip), %r11
movslq (%r11, %r10, 4), %r10
addq %r11, %r10
jmp *%r10
Lload_table:
.long Lld_void-Lload_table /* FFI_TYPE_VOID */
.long Lld_int32-Lload_table /* FFI_TYPE_INT */
.long Lld_float-Lload_table /* FFI_TYPE_FLOAT */
.long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */
.long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */
.long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */
.long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */
.long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */
.long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */
.long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */
.long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */
.long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */
.long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */
.long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */
.long Lld_int64-Lload_table /* FFI_TYPE_POINTER */
.text
.align 3
Lld_void:
ret
.align 3
Lld_int8:
movzbl -24(%rsp), %eax
ret
.align 3
Lld_int16:
movzwl -24(%rsp), %eax
ret
.align 3
Lld_int32:
movl -24(%rsp), %eax
ret
.align 3
Lld_int64:
movq -24(%rsp), %rax
ret
.align 3
Lld_float:
movss -24(%rsp), %xmm0
ret
.align 3
Lld_double:
movsd -24(%rsp), %xmm0
ret
.align 3
Lld_ldouble:
fldt -24(%rsp)
ret
.align 3
Lld_struct:
/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
%rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
both rdx and xmm1 with the second word. For the remaining,
bit 8 set means xmm0 gets the second word, and bit 9 means
that rax gets the second word. */
movq -24(%rsp), %rcx
movq -16(%rsp), %rdx
movq -16(%rsp), %xmm1
testl $0x100, %eax
cmovnz %rdx, %rcx
movd %rcx, %xmm0
testl $0x200, %eax
movq -24(%rsp), %rax
cmovnz %rdx, %rax
ret
/* See the comment above Lload_sse; the same logic applies here. */
.align 3
LUW8:
Lsave_sse:
movdqa %xmm0, 48(%rsp)
movdqa %xmm1, 64(%rsp)
movdqa %xmm2, 80(%rsp)
movdqa %xmm3, 96(%rsp)
movdqa %xmm4, 112(%rsp)
movdqa %xmm5, 128(%rsp)
movdqa %xmm6, 144(%rsp)
movdqa %xmm7, 160(%rsp)
jmp Lret_from_save_sse
LUW9:
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame1:
.set L$set$0,LECIE1-LSCIE1 /* CIE Length */
.long L$set$0
LSCIE1:
.long 0x0 /* CIE Identifier Tag */
.byte 0x1 /* CIE Version */
.ascii "zR\0" /* CIE Augmentation */
.byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */
.byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */
.byte 0x10 /* CIE RA Column */
.byte 0x1 /* uleb128 0x1; Augmentation size */
.byte 0x10 /* FDE Encoding (pcrel sdata4) */
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
.byte 0x7 /* uleb128 0x7 */
.byte 0x8 /* uleb128 0x8 */
.byte 0x90 /* DW_CFA_offset, column 0x10 */
.byte 0x1
.align 3
LECIE1:
.globl _ffi_call_unix64.eh
_ffi_call_unix64.eh:
LSFDE1:
.set L$set$1,LEFDE1-LASFDE1 /* FDE Length */
.long L$set$1
LASFDE1:
.long LASFDE1-EH_frame1 /* FDE CIE offset */
.quad LUW0-. /* FDE initial location */
.set L$set$2,LUW4-LUW0 /* FDE address range */
.quad L$set$2
.byte 0x0 /* Augmentation size */
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$3,LUW1-LUW0
.long L$set$3
/* New stack frame based off rbp. This is a itty bit of unwind
trickery in that the CFA *has* changed. There is no easy way
to describe it correctly on entry to the function. Fortunately,
it doesn't matter too much since at all points we can correctly
unwind back to ffi_call. Note that the location to which we
moved the return address is (the new) CFA-8, so from the
perspective of the unwind info, it hasn't moved. */
.byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
.byte 0x6
.byte 0x20
.byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
.byte 0x2
.byte 0xa /* DW_CFA_remember_state */
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$4,LUW2-LUW1
.long L$set$4
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
.byte 0x7
.byte 0x8
.byte 0xc0+6 /* DW_CFA_restore, %rbp */
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$5,LUW3-LUW2
.long L$set$5
.byte 0xb /* DW_CFA_restore_state */
.align 3
LEFDE1:
.globl _ffi_closure_unix64.eh
_ffi_closure_unix64.eh:
LSFDE3:
.set L$set$6,LEFDE3-LASFDE3 /* FDE Length */
.long L$set$6
LASFDE3:
.long LASFDE3-EH_frame1 /* FDE CIE offset */
.quad LUW5-. /* FDE initial location */
.set L$set$7,LUW9-LUW5 /* FDE address range */
.quad L$set$7
.byte 0x0 /* Augmentation size */
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$8,LUW6-LUW5
.long L$set$8
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 208,1 /* uleb128 208 */
.byte 0xa /* DW_CFA_remember_state */
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$9,LUW7-LUW6
.long L$set$9
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8
.byte 0x4 /* DW_CFA_advance_loc4 */
.set L$set$10,LUW8-LUW7
.long L$set$10
.byte 0xb /* DW_CFA_restore_state */
.align 3
LEFDE3:
.subsections_via_symbols
#endif /* __x86_64__ */

View file

@ -0,0 +1,868 @@
/* -----------------------------------------------------------------------
ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
Copyright (c) 2002 Ranjit Mathew
Copyright (c) 2002 Bo Thorsen
Copyright (c) 2002 Roger Sayle
Copyright (C) 2008, 2010 Free Software Foundation, Inc.
x86 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#if !defined(__x86_64__) || defined(_WIN64)
#ifdef _WIN64
#include <windows.h>
#endif
#include <ffi.h>
#include <ffi_common.h>
#include <stdlib.h>
/* ffi_prep_args is called by the assembly routine once stack space
has been allocated for the function's arguments */
void ffi_prep_args(char *stack, extended_cif *ecif)
{
register unsigned int i;
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
#ifdef X86_WIN32
size_t p_stack_args[2];
void *p_stack_data[2];
char *argp2 = stack;
int stack_args_count = 0;
int cabi = ecif->cif->abi;
#endif
argp = stack;
if ((ecif->cif->flags == FFI_TYPE_STRUCT
|| ecif->cif->flags == FFI_TYPE_MS_STRUCT)
#ifdef X86_WIN64
&& (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
&& ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
#endif
)
{
*(void **) argp = ecif->rvalue;
#ifdef X86_WIN32
/* For fastcall/thiscall this is first register-passed
argument. */
if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
{
p_stack_args[stack_args_count] = sizeof (void*);
p_stack_data[stack_args_count] = argp;
++stack_args_count;
}
#endif
argp += sizeof(void*);
}
p_argv = ecif->avalue;
for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
i != 0;
i--, p_arg++)
{
size_t z;
/* Align if necessary */
if ((sizeof(void*) - 1) & (size_t) argp)
argp = (char *) ALIGN(argp, sizeof(void*));
z = (*p_arg)->size;
#ifdef X86_WIN64
if (z > sizeof(ffi_arg)
|| ((*p_arg)->type == FFI_TYPE_STRUCT
&& (z != 1 && z != 2 && z != 4 && z != 8))
#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
|| ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
#endif
)
{
z = sizeof(ffi_arg);
*(void **)argp = *p_argv;
}
else if ((*p_arg)->type == FFI_TYPE_FLOAT)
{
memcpy(argp, *p_argv, z);
}
else
#endif
if (z < sizeof(ffi_arg))
{
z = sizeof(ffi_arg);
switch ((*p_arg)->type)
{
case FFI_TYPE_SINT8:
*(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
break;
case FFI_TYPE_UINT8:
*(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
break;
case FFI_TYPE_SINT16:
*(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
break;
case FFI_TYPE_UINT16:
*(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
break;
case FFI_TYPE_SINT32:
*(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
break;
case FFI_TYPE_UINT32:
*(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
break;
case FFI_TYPE_STRUCT:
*(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
break;
default:
FFI_ASSERT(0);
}
}
else
{
memcpy(argp, *p_argv, z);
}
#ifdef X86_WIN32
/* For thiscall/fastcall convention register-passed arguments
are the first two none-floating-point arguments with a size
smaller or equal to sizeof (void*). */
if ((cabi == FFI_THISCALL && stack_args_count < 1)
|| (cabi == FFI_FASTCALL && stack_args_count < 2))
{
if (z <= 4
&& ((*p_arg)->type != FFI_TYPE_FLOAT
&& (*p_arg)->type != FFI_TYPE_STRUCT))
{
p_stack_args[stack_args_count] = z;
p_stack_data[stack_args_count] = argp;
++stack_args_count;
}
}
#endif
p_argv++;
#ifdef X86_WIN64
argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
#else
argp += z;
#endif
}
#ifdef X86_WIN32
/* We need to move the register-passed arguments for thiscall/fastcall
on top of stack, so that those can be moved to registers ecx/edx by
call-handler. */
if (stack_args_count > 0)
{
size_t zz = (p_stack_args[0] + 3) & ~3;
char *h;
/* Move first argument to top-stack position. */
if (p_stack_data[0] != argp2)
{
h = alloca (zz + 1);
memcpy (h, p_stack_data[0], zz);
memmove (argp2 + zz, argp2,
(size_t) ((char *) p_stack_data[0] - (char*)argp2));
memcpy (argp2, h, zz);
}
argp2 += zz;
--stack_args_count;
if (zz > 4)
stack_args_count = 0;
/* If we have a second argument, then move it on top
after the first one. */
if (stack_args_count > 0 && p_stack_data[1] != argp2)
{
zz = p_stack_args[1];
zz = (zz + 3) & ~3;
h = alloca (zz + 1);
h = alloca (zz + 1);
memcpy (h, p_stack_data[1], zz);
memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
memcpy (argp2, h, zz);
}
}
#endif
return;
}
/* Perform machine dependent cif processing */
ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
{
unsigned int i;
ffi_type **ptr;
/* Set the return type flag */
switch (cif->rtype->type)
{
case FFI_TYPE_VOID:
case FFI_TYPE_UINT8:
case FFI_TYPE_UINT16:
case FFI_TYPE_SINT8:
case FFI_TYPE_SINT16:
#ifdef X86_WIN64
case FFI_TYPE_UINT32:
case FFI_TYPE_SINT32:
#endif
case FFI_TYPE_SINT64:
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
#ifndef X86_WIN64
#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
case FFI_TYPE_LONGDOUBLE:
#endif
#endif
cif->flags = (unsigned) cif->rtype->type;
break;
case FFI_TYPE_UINT64:
#ifdef X86_WIN64
case FFI_TYPE_POINTER:
#endif
cif->flags = FFI_TYPE_SINT64;
break;
case FFI_TYPE_STRUCT:
#ifndef X86
if (cif->rtype->size == 1)
{
cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
}
else if (cif->rtype->size == 2)
{
cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
}
else if (cif->rtype->size == 4)
{
#ifdef X86_WIN64
cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
#else
cif->flags = FFI_TYPE_INT; /* same as int type */
#endif
}
else if (cif->rtype->size == 8)
{
cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
}
else
#endif
{
#ifdef X86_WIN32
if (cif->abi == FFI_MS_CDECL)
cif->flags = FFI_TYPE_MS_STRUCT;
else
#endif
cif->flags = FFI_TYPE_STRUCT;
/* allocate space for return value pointer */
cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
}
break;
default:
#ifdef X86_WIN64
cif->flags = FFI_TYPE_SINT64;
break;
case FFI_TYPE_INT:
cif->flags = FFI_TYPE_SINT32;
#else
cif->flags = FFI_TYPE_INT;
#endif
break;
}
for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
{
//BCF
//int align = (*ptr)->alignment;
int align = sizeof(int);
if (((*ptr)->alignment - 1) & cif->bytes)
cif->bytes = ALIGN(cif->bytes, align);
cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
}
#ifdef X86_WIN64
/* ensure space for storing four registers */
cif->bytes += 4 * sizeof(ffi_arg);
#endif
//BCF - was unconditional before
#ifndef X86_WIN32
cif->bytes = (cif->bytes + 15) & ~0xF;
#endif
return FFI_OK;
}
#ifdef X86_WIN64
extern int
ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned *, void (*fn)(void));
#elif defined(X86_WIN32)
extern void
ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
#else
extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned *, void (*fn)(void));
#endif
void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
extended_cif ecif;
ecif.cif = cif;
ecif.avalue = avalue;
/* If the return value is a struct and we don't have a return */
/* value address then we need to make one */
#ifdef X86_WIN64
if (rvalue == NULL
&& cif->flags == FFI_TYPE_STRUCT
&& cif->rtype->size != 1 && cif->rtype->size != 2
&& cif->rtype->size != 4 && cif->rtype->size != 8)
{
ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
}
#else
if (rvalue == NULL
&& (cif->flags == FFI_TYPE_STRUCT
|| cif->flags == FFI_TYPE_MS_STRUCT))
{
ecif.rvalue = alloca(cif->rtype->size);
}
#endif
else
ecif.rvalue = rvalue;
switch (cif->abi)
{
#ifdef X86_WIN64
case FFI_WIN64:
ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
cif->flags, ecif.rvalue, fn);
break;
#elif defined(X86_WIN32)
case FFI_SYSV:
case FFI_STDCALL:
case FFI_MS_CDECL:
ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
case FFI_THISCALL:
case FFI_FASTCALL:
{
unsigned int abi = cif->abi;
unsigned int i, passed_regs = 0;
if (cif->flags == FFI_TYPE_STRUCT)
++passed_regs;
for (i=0; i < cif->nargs && passed_regs < 2;i++)
{
size_t sz;
if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
|| cif->arg_types[i]->type == FFI_TYPE_STRUCT)
continue;
sz = (cif->arg_types[i]->size + 3) & ~3;
if (sz == 0 || sz > 4)
continue;
++passed_regs;
}
if (passed_regs < 2 && abi == FFI_FASTCALL)
abi = FFI_THISCALL;
if (passed_regs < 1 && abi == FFI_THISCALL)
abi = FFI_STDCALL;
ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
}
break;
#else
case FFI_SYSV:
ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
fn);
break;
#endif
default:
FFI_ASSERT(0);
break;
}
}
/** private members **/
/* The following __attribute__((regparm(1))) decorations will have no effect
on MSVC or SUNPRO_C -- standard conventions apply. */
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
void** args, ffi_cif* cif);
void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
__attribute__ ((regparm(1)));
unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
__attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
__attribute__ ((regparm(1)));
#ifdef X86_WIN32
void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
__attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
__attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
__attribute__ ((regparm(1)));
#endif
#ifdef X86_WIN64
void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
#endif
/* This function is jumped to by the trampoline */
#ifdef X86_WIN64
void * FFI_HIDDEN
ffi_closure_win64_inner (ffi_closure *closure, void *args) {
ffi_cif *cif;
void **arg_area;
void *result;
void *resp = &result;
cif = closure->cif;
arg_area = (void**) alloca (cif->nargs * sizeof (void*));
/* this call will initialize ARG_AREA, such that each
* element in that array points to the corresponding
* value on the stack; and if the function returns
* a structure, it will change RESP to point to the
* structure return address. */
ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
(closure->fun) (cif, resp, arg_area, closure->user_data);
/* The result is returned in rax. This does the right thing for
result types except for floats; we have to 'mov xmm0, rax' in the
caller to correct this.
TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
*/
return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
}
#else
unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
{
/* our various things... */
ffi_cif *cif;
void **arg_area;
cif = closure->cif;
arg_area = (void**) alloca (cif->nargs * sizeof (void*));
/* this call will initialize ARG_AREA, such that each
* element in that array points to the corresponding
* value on the stack; and if the function returns
* a structure, it will change RESP to point to the
* structure return address. */
ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
(closure->fun) (cif, *respp, arg_area, closure->user_data);
return cif->flags;
}
#endif /* !X86_WIN64 */
static void
ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
ffi_cif *cif)
{
register unsigned int i;
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
argp = stack;
#ifdef X86_WIN64
if (cif->rtype->size > sizeof(ffi_arg)
|| (cif->flags == FFI_TYPE_STRUCT
&& (cif->rtype->size != 1 && cif->rtype->size != 2
&& cif->rtype->size != 4 && cif->rtype->size != 8))) {
*rvalue = *(void **) argp;
argp += sizeof(void *);
}
#else
if ( cif->flags == FFI_TYPE_STRUCT
|| cif->flags == FFI_TYPE_MS_STRUCT ) {
*rvalue = *(void **) argp;
argp += sizeof(void *);
}
#endif
p_argv = avalue;
for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
{
size_t z;
/* Align if necessary */
if ((sizeof(void*) - 1) & (size_t) argp) {
argp = (char *) ALIGN(argp, sizeof(void*));
}
#ifdef X86_WIN64
if ((*p_arg)->size > sizeof(ffi_arg)
|| ((*p_arg)->type == FFI_TYPE_STRUCT
&& ((*p_arg)->size != 1 && (*p_arg)->size != 2
&& (*p_arg)->size != 4 && (*p_arg)->size != 8)))
{
z = sizeof(void *);
*p_argv = *(void **)argp;
}
else
#endif
{
z = (*p_arg)->size;
/* because we're little endian, this is what it turns into. */
*p_argv = (void*) argp;
}
p_argv++;
#ifdef X86_WIN64
argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
#else
argp += z;
#endif
}
return;
}
#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
void* __fun = (void*)(FUN); \
void* __ctx = (void*)(CTX); \
*(unsigned char*) &__tramp[0] = 0x41; \
*(unsigned char*) &__tramp[1] = 0xbb; \
*(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
*(unsigned char*) &__tramp[6] = 0x48; \
*(unsigned char*) &__tramp[7] = 0xb8; \
*(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
*(unsigned char *) &__tramp[16] = 0x49; \
*(unsigned char *) &__tramp[17] = 0xba; \
*(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
*(unsigned char *) &__tramp[26] = 0x41; \
*(unsigned char *) &__tramp[27] = 0xff; \
*(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
}
/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
unsigned int __dis = __fun - (__ctx + 10); \
*(unsigned char*) &__tramp[0] = 0xb8; \
*(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
*(unsigned char *) &__tramp[5] = 0xe9; \
*(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
}
#ifdef zX86_WIN32
#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
unsigned int __dis = __fun - (__ctx + 10); \
unsigned short __size = (unsigned short)(SIZE); \
*(unsigned char*) &__tramp[0] = 0xb8; \
*(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
*(unsigned char*) &__tramp[5] = 0x58; /* pop eax - old return */ \
*(unsigned char*) &__tramp[6] = 0x51; /* push ecx - this */ \
*(unsigned char*) &__tramp[7] = 0x50; /* push eax */ \
*(unsigned char *) &__tramp[8] = 0xe8; \
*(unsigned int*) &__tramp[9] = __dis; /* call __fun */ \
*(unsigned char *) &__tramp[13] = 0xc2; \
*(unsigned short*) &__tramp[14] = __size; /* ret __size */ \
}
#else
#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
unsigned int __dis = __fun - (__ctx + 49); \
unsigned short __size = (unsigned short)(SIZE); \
*(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \
*(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \
*(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \
*(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \
*(unsigned char*) &__tramp[13] = 0xb8; \
*(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \
*(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \
*(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
*(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \
*(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \
*(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
*(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \
*(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \
*(unsigned char*) &__tramp[39] = 0xb8; \
*(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
*(unsigned char *) &__tramp[44] = 0xe8; \
*(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \
*(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \
*(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \
}
#endif
#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
unsigned int __dis = __fun - (__ctx + 10); \
unsigned short __size = (unsigned short)(SIZE); \
*(unsigned char*) &__tramp[0] = 0xb8; \
*(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
*(unsigned char *) &__tramp[5] = 0xe8; \
*(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \
*(unsigned char *) &__tramp[10] = 0xc2; \
*(unsigned short*) &__tramp[11] = __size; /* ret __size */ \
}
/* the cif must already be prep'ed */
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
ffi_cif* cif,
void (*fun)(ffi_cif*,void*,void**,void*),
void *user_data,
void *codeloc)
{
#ifdef X86_WIN64
#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
if (cif->abi == FFI_WIN64)
{
int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
&ffi_closure_win64,
codeloc, mask);
/* make sure we can execute here */
}
#else
if (cif->abi == FFI_SYSV)
{
FFI_INIT_TRAMPOLINE (&closure->tramp[0],
&ffi_closure_SYSV,
(void*)codeloc);
}
#ifdef X86_WIN32
else if (cif->abi == FFI_THISCALL)
{
FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
&ffi_closure_THISCALL,
(void*)codeloc,
cif->bytes);
}
else if (cif->abi == FFI_STDCALL)
{
FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
&ffi_closure_STDCALL,
(void*)codeloc, cif->bytes);
}
else if (cif->abi == FFI_MS_CDECL)
{
FFI_INIT_TRAMPOLINE (&closure->tramp[0],
&ffi_closure_SYSV,
(void*)codeloc);
}
#endif /* X86_WIN32 */
#endif /* !X86_WIN64 */
else
{
return FFI_BAD_ABI;
}
closure->cif = cif;
closure->user_data = user_data;
closure->fun = fun;
return FFI_OK;
}
/* ------- Native raw API support -------------------------------- */
#if !FFI_NO_RAW_API
ffi_status
ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
ffi_cif* cif,
void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
void *user_data,
void *codeloc)
{
int i;
if (cif->abi != FFI_SYSV) {
#ifdef X86_WIN32
if (cif->abi != FFI_THISCALL)
#endif
return FFI_BAD_ABI;
}
/* we currently don't support certain kinds of arguments for raw
closures. This should be implemented by a separate assembly
language routine, since it would require argument processing,
something we don't do now for performance. */
for (i = cif->nargs-1; i >= 0; i--)
{
FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
}
#ifdef X86_WIN32
if (cif->abi == FFI_SYSV)
{
#endif
FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
codeloc);
#ifdef X86_WIN32
}
else if (cif->abi == FFI_THISCALL)
{
FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
codeloc, cif->bytes);
}
#endif
closure->cif = cif;
closure->user_data = user_data;
closure->fun = fun;
return FFI_OK;
}
static void
ffi_prep_args_raw(char *stack, extended_cif *ecif)
{
memcpy (stack, ecif->avalue, ecif->cif->bytes);
}
/* we borrow this routine from libffi (it must be changed, though, to
* actually call the function passed in the first argument. as of
* libffi-1.20, this is not the case.)
*/
void
ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
{
extended_cif ecif;
void **avalue = (void **)fake_avalue;
ecif.cif = cif;
ecif.avalue = avalue;
/* If the return value is a struct and we don't have a return */
/* value address then we need to make one */
if (rvalue == NULL
&& (cif->flags == FFI_TYPE_STRUCT
|| cif->flags == FFI_TYPE_MS_STRUCT))
{
ecif.rvalue = alloca(cif->rtype->size);
}
else
ecif.rvalue = rvalue;
switch (cif->abi)
{
#ifdef X86_WIN32
case FFI_SYSV:
case FFI_STDCALL:
case FFI_MS_CDECL:
ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
case FFI_THISCALL:
case FFI_FASTCALL:
{
unsigned int abi = cif->abi;
unsigned int i, passed_regs = 0;
if (cif->flags == FFI_TYPE_STRUCT)
++passed_regs;
for (i=0; i < cif->nargs && passed_regs < 2;i++)
{
size_t sz;
if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
|| cif->arg_types[i]->type == FFI_TYPE_STRUCT)
continue;
sz = (cif->arg_types[i]->size + 3) & ~3;
if (sz == 0 || sz > 4)
continue;
++passed_regs;
}
if (passed_regs < 2 && abi == FFI_FASTCALL)
cif->abi = abi = FFI_THISCALL;
if (passed_regs < 1 && abi == FFI_THISCALL)
cif->abi = abi = FFI_STDCALL;
ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
}
break;
#else
case FFI_SYSV:
ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
#endif
default:
FFI_ASSERT(0);
break;
}
}
#endif
#endif /* !__x86_64__ || X86_WIN64 */

View file

@ -0,0 +1,673 @@
/* -----------------------------------------------------------------------
ffi64.c - Copyright (c) 2013 The Written Word, Inc.
Copyright (c) 2011 Anthony Green
Copyright (c) 2008, 2010 Red Hat, Inc.
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
x86-64 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#include <ffi.h>
#include <ffi_common.h>
#include <stdlib.h>
#include <stdarg.h>
#ifdef __x86_64__
#define MAX_GPR_REGS 6
#define MAX_SSE_REGS 8
#if defined(__INTEL_COMPILER)
#define UINT128 __m128
#else
#if defined(__SUNPRO_C)
#include <sunmedia_types.h>
#define UINT128 __m128i
#else
#define UINT128 __int128_t
#endif
#endif
union big_int_union
{
UINT32 i32;
UINT64 i64;
UINT128 i128;
};
struct register_args
{
/* Registers for argument passing. */
UINT64 gpr[MAX_GPR_REGS];
union big_int_union sse[MAX_SSE_REGS];
};
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void), unsigned ssecount);
/* All reference to register classes here is identical to the code in
gcc/config/i386/i386.c. Do *not* change one without the other. */
/* Register class used for passing given 64bit part of the argument.
These represent classes as documented by the PS ABI, with the
exception of SSESF, SSEDF classes, that are basically SSE class,
just gcc will use SF or DFmode move instead of DImode to avoid
reformatting penalties.
Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
whenever possible (upper half does contain padding). */
enum x86_64_reg_class
{
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
X86_64_SSE_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
X86_64_SSEUP_CLASS,
X86_64_X87_CLASS,
X86_64_X87UP_CLASS,
X86_64_COMPLEX_X87_CLASS,
X86_64_MEMORY_CLASS
};
#define MAX_CLASSES 4
#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
of this code is to classify each 8bytes of incoming argument by the register
class and assign registers accordingly. */
/* Return the union class of CLASS1 and CLASS2.
See the x86-64 PS ABI for details. */
static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
{
/* Rule #1: If both classes are equal, this is the resulting class. */
if (class1 == class2)
return class1;
/* Rule #2: If one of the classes is NO_CLASS, the resulting class is
the other class. */
if (class1 == X86_64_NO_CLASS)
return class2;
if (class2 == X86_64_NO_CLASS)
return class1;
/* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
|| (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
return X86_64_INTEGERSI_CLASS;
if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
return X86_64_INTEGER_CLASS;
/* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
MEMORY is used. */
if (class1 == X86_64_X87_CLASS
|| class1 == X86_64_X87UP_CLASS
|| class1 == X86_64_COMPLEX_X87_CLASS
|| class2 == X86_64_X87_CLASS
|| class2 == X86_64_X87UP_CLASS
|| class2 == X86_64_COMPLEX_X87_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #6: Otherwise class SSE is used. */
return X86_64_SSE_CLASS;
}
/* Classify the argument of type TYPE and mode MODE.
CLASSES will be filled by the register class used to pass each word
of the operand. The number of words is returned. In case the parameter
should be passed in memory, 0 is returned. As a special case for zero
sized containers, classes[0] will be NO_CLASS and 1 is returned.
See the x86-64 PS ABI for details.
*/
static int
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
size_t byte_offset)
{
switch (type->type)
{
case FFI_TYPE_UINT8:
case FFI_TYPE_SINT8:
case FFI_TYPE_UINT16:
case FFI_TYPE_SINT16:
case FFI_TYPE_UINT32:
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
{
int size = byte_offset + type->size;
if (size <= 4)
{
classes[0] = X86_64_INTEGERSI_CLASS;
return 1;
}
else if (size <= 8)
{
classes[0] = X86_64_INTEGER_CLASS;
return 1;
}
else if (size <= 12)
{
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGERSI_CLASS;
return 2;
}
else if (size <= 16)
{
classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
return 2;
}
else
FFI_ASSERT (0);
}
case FFI_TYPE_FLOAT:
if (!(byte_offset % 8))
classes[0] = X86_64_SSESF_CLASS;
else
classes[0] = X86_64_SSE_CLASS;
return 1;
case FFI_TYPE_DOUBLE:
classes[0] = X86_64_SSEDF_CLASS;
return 1;
case FFI_TYPE_LONGDOUBLE:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
return 2;
case FFI_TYPE_STRUCT:
{
const int UNITS_PER_WORD = 8;
int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
ffi_type **ptr;
int i;
enum x86_64_reg_class subclasses[MAX_CLASSES];
/* If the struct is larger than 32 bytes, pass it on the stack. */
if (type->size > 32)
return 0;
for (i = 0; i < words; i++)
classes[i] = X86_64_NO_CLASS;
/* Zero sized arrays or structures are NO_CLASS. We return 0 to
signalize memory class, so handle it as special case. */
if (!words)
{
classes[0] = X86_64_NO_CLASS;
return 1;
}
/* Merge the fields of structure. */
for (ptr = type->elements; *ptr != NULL; ptr++)
{
int num;
byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
num = classify_argument (*ptr, subclasses, byte_offset % 8);
if (num == 0)
return 0;
for (i = 0; i < num; i++)
{
int pos = byte_offset / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
byte_offset += (*ptr)->size;
}
if (words > 2)
{
/* When size > 16 bytes, if the first one isn't
X86_64_SSE_CLASS or any other ones aren't
X86_64_SSEUP_CLASS, everything should be passed in
memory. */
if (classes[0] != X86_64_SSE_CLASS)
return 0;
for (i = 1; i < words; i++)
if (classes[i] != X86_64_SSEUP_CLASS)
return 0;
}
/* Final merger cleanup. */
for (i = 0; i < words; i++)
{
/* If one class is MEMORY, everything should be passed in
memory. */
if (classes[i] == X86_64_MEMORY_CLASS)
return 0;
/* The X86_64_SSEUP_CLASS should be always preceded by
X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
if (classes[i] == X86_64_SSEUP_CLASS
&& classes[i - 1] != X86_64_SSE_CLASS
&& classes[i - 1] != X86_64_SSEUP_CLASS)
{
/* The first one should never be X86_64_SSEUP_CLASS. */
FFI_ASSERT (i != 0);
classes[i] = X86_64_SSE_CLASS;
}
/* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
everything should be passed in memory. */
if (classes[i] == X86_64_X87UP_CLASS
&& (classes[i - 1] != X86_64_X87_CLASS))
{
/* The first one should never be X86_64_X87UP_CLASS. */
FFI_ASSERT (i != 0);
return 0;
}
}
return words;
}
default:
FFI_ASSERT(0);
}
return 0; /* Never reached. */
}
/* Examine the argument and return set number of register required in each
class. Return zero iff parameter should be passed in memory, otherwise
the number of registers. */
static int
examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
_Bool in_return, int *pngpr, int *pnsse)
{
int i, n, ngpr, nsse;
n = classify_argument (type, classes, 0);
if (n == 0)
return 0;
ngpr = nsse = 0;
for (i = 0; i < n; ++i)
switch (classes[i])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
ngpr++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
nsse++;
break;
case X86_64_NO_CLASS:
case X86_64_SSEUP_CLASS:
break;
case X86_64_X87_CLASS:
case X86_64_X87UP_CLASS:
case X86_64_COMPLEX_X87_CLASS:
return in_return != 0;
default:
abort ();
}
*pngpr = ngpr;
*pnsse = nsse;
return n;
}
/* Perform machine dependent cif processing. */
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
size_t bytes;
gprcount = ssecount = 0;
flags = cif->rtype->type;
if (flags != FFI_TYPE_VOID)
{
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
/* The return value is passed in memory. A pointer to that
memory is the first argument. Allocate a register for it. */
gprcount++;
/* We don't have to do anything in asm for the return. */
flags = FFI_TYPE_VOID;
}
else if (flags == FFI_TYPE_STRUCT)
{
/* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
_Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
if (sse0 && !sse1)
flags |= 1 << 8;
else if (!sse0 && sse1)
flags |= 1 << 9;
else if (sse0 && sse1)
flags |= 1 << 10;
/* Mark the true size of the structure. */
flags |= cif->rtype->size << 12;
}
}
/* Go over all arguments and determine the way they should be passed.
If it's in a register and there is space for it, let that be so. If
not, add it's size to the stack byte count. */
for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
{
if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
|| gprcount + ngpr > MAX_GPR_REGS
|| ssecount + nsse > MAX_SSE_REGS)
{
long align = cif->arg_types[i]->alignment;
if (align < 8)
align = 8;
bytes = ALIGN (bytes, align);
bytes += cif->arg_types[i]->size;
}
else
{
gprcount += ngpr;
ssecount += nsse;
}
}
if (ssecount)
flags |= 1 << 11;
cif->flags = flags;
cif->bytes = ALIGN (bytes, 8);
return FFI_OK;
}
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
enum x86_64_reg_class classes[MAX_CLASSES];
char *stack, *argp;
ffi_type **arg_types;
int gprcount, ssecount, ngpr, nsse, i, avn;
_Bool ret_in_memory;
struct register_args *reg_args;
/* Can't call 32-bit mode from 64-bit mode. */
FFI_ASSERT (cif->abi == FFI_UNIX64);
/* If the return value is a struct and we don't have a return value
address then we need to make one. Note the setting of flags to
VOID above in ffi_prep_cif_machdep. */
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
&& (cif->flags & 0xff) == FFI_TYPE_VOID);
if (rvalue == NULL && ret_in_memory)
rvalue = alloca (cif->rtype->size);
/* Allocate the space for the arguments, plus 4 words of temp space. */
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
reg_args = (struct register_args *) stack;
argp = stack + sizeof (struct register_args);
gprcount = ssecount = 0;
/* If the return value is passed in memory, add the pointer as the
first integer argument. */
if (ret_in_memory)
reg_args->gpr[gprcount++] = (unsigned long) rvalue;
avn = cif->nargs;
arg_types = cif->arg_types;
for (i = 0; i < avn; ++i)
{
size_t size = arg_types[i]->size;
int n;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
|| gprcount + ngpr > MAX_GPR_REGS
|| ssecount + nsse > MAX_SSE_REGS)
{
long align = arg_types[i]->alignment;
/* Stack arguments are *always* at least 8 byte aligned. */
if (align < 8)
align = 8;
/* Pass this argument in memory. */
argp = (void *) ALIGN (argp, align);
memcpy (argp, avalue[i], size);
argp += size;
}
else
{
/* The argument is passed entirely in registers. */
char *a = (char *) avalue[i];
int j;
for (j = 0; j < n; j++, a += 8, size -= 8)
{
switch (classes[j])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
/* Sign-extend integer arguments passed in general
purpose registers, to cope with the fact that
LLVM incorrectly assumes that this will be done
(the x86-64 PS ABI does not specify this). */
switch (arg_types[i]->type)
{
case FFI_TYPE_SINT8:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
break;
case FFI_TYPE_SINT16:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
break;
case FFI_TYPE_SINT32:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
break;
default:
reg_args->gpr[gprcount] = 0;
memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
}
gprcount++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSEDF_CLASS:
reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
break;
case X86_64_SSESF_CLASS:
reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
break;
default:
abort();
}
}
}
}
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
cif->flags, rvalue, fn, ssecount);
}
extern void ffi_closure_unix64(void);
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
ffi_cif* cif,
void (*fun)(ffi_cif*, void*, void**, void*),
void *user_data,
void *codeloc)
{
volatile unsigned short *tramp;
/* Sanity check on the cif ABI. */
{
int abi = cif->abi;
if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
return FFI_BAD_ABI;
}
tramp = (volatile unsigned short *) &closure->tramp[0];
tramp[0] = 0xbb49; /* mov <code>, %r11 */
*((unsigned long long * volatile) &tramp[1])
= (unsigned long) ffi_closure_unix64;
tramp[5] = 0xba49; /* mov <data>, %r10 */
*((unsigned long long * volatile) &tramp[6])
= (unsigned long) codeloc;
/* Set the carry bit iff the function uses any sse registers.
This is clc or stc, together with the first byte of the jmp. */
tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
tramp[11] = 0xe3ff; /* jmp *%r11 */
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
return FFI_OK;
}
int
ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
struct register_args *reg_args, char *argp)
{
ffi_cif *cif;
void **avalue;
ffi_type **arg_types;
long i, avn;
int gprcount, ssecount, ngpr, nsse;
int ret;
cif = closure->cif;
avalue = alloca(cif->nargs * sizeof(void *));
gprcount = ssecount = 0;
ret = cif->rtype->type;
if (ret != FFI_TYPE_VOID)
{
enum x86_64_reg_class classes[MAX_CLASSES];
int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
/* The return value goes in memory. Arrange for the closure
return value to go directly back to the original caller. */
rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
/* We don't have to do anything in asm for the return. */
ret = FFI_TYPE_VOID;
}
else if (ret == FFI_TYPE_STRUCT && n == 2)
{
/* Mark which register the second word of the structure goes in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
_Bool sse1 = SSE_CLASS_P (classes[1]);
if (!sse0 && sse1)
ret |= 1 << 8;
else if (sse0 && !sse1)
ret |= 1 << 9;
}
}
avn = cif->nargs;
arg_types = cif->arg_types;
for (i = 0; i < avn; ++i)
{
enum x86_64_reg_class classes[MAX_CLASSES];
int n;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
|| gprcount + ngpr > MAX_GPR_REGS
|| ssecount + nsse > MAX_SSE_REGS)
{
long align = arg_types[i]->alignment;
/* Stack arguments are *always* at least 8 byte aligned. */
if (align < 8)
align = 8;
/* Pass this argument in memory. */
argp = (void *) ALIGN (argp, align);
avalue[i] = argp;
argp += arg_types[i]->size;
}
/* If the argument is in a single register, or two consecutive
integer registers, then we can use that address directly. */
else if (n == 1
|| (n == 2 && !(SSE_CLASS_P (classes[0])
|| SSE_CLASS_P (classes[1]))))
{
/* The argument is in a single register. */
if (SSE_CLASS_P (classes[0]))
{
avalue[i] = &reg_args->sse[ssecount];
ssecount += n;
}
else
{
avalue[i] = &reg_args->gpr[gprcount];
gprcount += n;
}
}
/* Otherwise, allocate space to make them consecutive. */
else
{
char *a = alloca (16);
int j;
avalue[i] = a;
for (j = 0; j < n; j++, a += 8)
{
if (SSE_CLASS_P (classes[j]))
memcpy (a, &reg_args->sse[ssecount++], 8);
else
memcpy (a, &reg_args->gpr[gprcount++], 8);
}
}
}
/* Invoke the closure. */
closure->fun (cif, rvalue, avalue, closure->user_data);
/* Tell assembly how to perform return type promotions. */
return ret;
}
#endif /* __x86_64__ */

View file

@ -0,0 +1,458 @@
/* -----------------------------------------------------------------------
freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
Copyright (c) 2008 Björn König
X86 Foreign Function Interface for FreeBSD
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#ifndef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
.text
.globl ffi_prep_args
.align 4
.globl ffi_call_SYSV
.type ffi_call_SYSV,@function
ffi_call_SYSV:
.LFB1:
pushl %ebp
.LCFI0:
movl %esp,%ebp
.LCFI1:
/* Make room for all of the new args. */
movl 16(%ebp),%ecx
subl %ecx,%esp
movl %esp,%eax
/* Place all of the ffi_prep_args in position */
pushl 12(%ebp)
pushl %eax
call *8(%ebp)
/* Return stack to previous state and call the function */
addl $8,%esp
call *28(%ebp)
/* Load %ecx with the return type code */
movl 20(%ebp),%ecx
/* Protect %esi. We're going to pop it in the epilogue. */
pushl %esi
/* If the return value pointer is NULL, assume no return value. */
cmpl $0,24(%ebp)
jne 0f
/* Even if there is no space for the return value, we are
obliged to handle floating-point values. */
cmpl $FFI_TYPE_FLOAT,%ecx
jne noretval
fstp %st(0)
jmp epilogue
0:
call 1f
.Lstore_table:
.long noretval-.Lstore_table /* FFI_TYPE_VOID */
.long retint-.Lstore_table /* FFI_TYPE_INT */
.long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
.long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
.long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
.long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
.long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
.long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
.long retint-.Lstore_table /* FFI_TYPE_UINT32 */
.long retint-.Lstore_table /* FFI_TYPE_SINT32 */
.long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
.long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
.long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
.long retint-.Lstore_table /* FFI_TYPE_POINTER */
.long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
.long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
1:
pop %esi
add (%esi, %ecx, 4), %esi
jmp *%esi
/* Sign/zero extend as appropriate. */
retsint8:
movsbl %al, %eax
jmp retint
retsint16:
movswl %ax, %eax
jmp retint
retuint8:
movzbl %al, %eax
jmp retint
retuint16:
movzwl %ax, %eax
jmp retint
retfloat:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstps (%ecx)
jmp epilogue
retdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpl (%ecx)
jmp epilogue
retlongdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpt (%ecx)
jmp epilogue
retint64:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
movl %edx,4(%ecx)
jmp epilogue
retstruct1b:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movb %al,0(%ecx)
jmp epilogue
retstruct2b:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movw %ax,0(%ecx)
jmp epilogue
retint:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
retstruct:
/* Nothing to do! */
noretval:
epilogue:
popl %esi
movl %ebp,%esp
popl %ebp
ret
.LFE1:
.ffi_call_SYSV_end:
.size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
.align 4
FFI_HIDDEN (ffi_closure_SYSV)
.globl ffi_closure_SYSV
.type ffi_closure_SYSV, @function
ffi_closure_SYSV:
.LFB2:
pushl %ebp
.LCFI2:
movl %esp, %ebp
.LCFI3:
subl $40, %esp
leal -24(%ebp), %edx
movl %edx, -12(%ebp) /* resp */
leal 8(%ebp), %edx
movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
leal -12(%ebp), %edx
movl %edx, (%esp) /* &resp */
#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
call ffi_closure_SYSV_inner
#else
movl %ebx, 8(%esp)
.LCFI7:
call 1f
1: popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
call ffi_closure_SYSV_inner@PLT
movl 8(%esp), %ebx
#endif
movl -12(%ebp), %ecx
cmpl $FFI_TYPE_INT, %eax
je .Lcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lcls_retint
0: cmpl $FFI_TYPE_FLOAT, %eax
je .Lcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lcls_retllong
cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
je .Lcls_retstruct1b
cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
je .Lcls_retstruct2b
cmpl $FFI_TYPE_STRUCT, %eax
je .Lcls_retstruct
.Lcls_epilogue:
movl %ebp, %esp
popl %ebp
ret
.Lcls_retint:
movl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retfloat:
flds (%ecx)
jmp .Lcls_epilogue
.Lcls_retdouble:
fldl (%ecx)
jmp .Lcls_epilogue
.Lcls_retldouble:
fldt (%ecx)
jmp .Lcls_epilogue
.Lcls_retllong:
movl (%ecx), %eax
movl 4(%ecx), %edx
jmp .Lcls_epilogue
.Lcls_retstruct1b:
movsbl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retstruct2b:
movswl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retstruct:
movl %ebp, %esp
popl %ebp
ret $4
.LFE2:
.size ffi_closure_SYSV, .-ffi_closure_SYSV
#if !FFI_NO_RAW_API
#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
.align 4
FFI_HIDDEN (ffi_closure_raw_SYSV)
.globl ffi_closure_raw_SYSV
.type ffi_closure_raw_SYSV, @function
ffi_closure_raw_SYSV:
.LFB3:
pushl %ebp
.LCFI4:
movl %esp, %ebp
.LCFI5:
pushl %esi
.LCFI6:
subl $36, %esp
movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
movl %edx, 12(%esp) /* user_data */
leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
movl %edx, 8(%esp) /* raw_args */
leal -24(%ebp), %edx
movl %edx, 4(%esp) /* &res */
movl %esi, (%esp) /* cif */
call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
cmpl $FFI_TYPE_INT, %eax
je .Lrcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lrcls_retint
0:
cmpl $FFI_TYPE_FLOAT, %eax
je .Lrcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lrcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lrcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lrcls_retllong
.Lrcls_epilogue:
addl $36, %esp
popl %esi
popl %ebp
ret
.Lrcls_retint:
movl -24(%ebp), %eax
jmp .Lrcls_epilogue
.Lrcls_retfloat:
flds -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retdouble:
fldl -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retldouble:
fldt -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retllong:
movl -24(%ebp), %eax
movl -20(%ebp), %edx
jmp .Lrcls_epilogue
.LFE3:
.size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
#endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
.long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
.LSCIE1:
.long 0x0 /* CIE Identifier Tag */
.byte 0x1 /* CIE Version */
#ifdef __PIC__
.ascii "zR\0" /* CIE Augmentation */
#else
.ascii "\0" /* CIE Augmentation */
#endif
.byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
.byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
.byte 0x8 /* CIE RA Column */
#ifdef __PIC__
.byte 0x1 /* .uleb128 0x1; Augmentation size */
.byte 0x1b /* FDE Encoding (pcrel sdata4) */
#endif
.byte 0xc /* DW_CFA_def_cfa */
.byte 0x4 /* .uleb128 0x4 */
.byte 0x4 /* .uleb128 0x4 */
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.byte 0x1 /* .uleb128 0x1 */
.align 4
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1 /* FDE Length */
.LASFDE1:
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
#ifdef __PIC__
.long .LFB1-. /* FDE initial location */
#else
.long .LFB1 /* FDE initial location */
#endif
.long .LFE1-.LFB1 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI0-.LFB1
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI1-.LCFI0
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
.align 4
.LEFDE1:
.LSFDE2:
.long .LEFDE2-.LASFDE2 /* FDE Length */
.LASFDE2:
.long .LASFDE2-.Lframe1 /* FDE CIE offset */
#ifdef __PIC__
.long .LFB2-. /* FDE initial location */
#else
.long .LFB2
#endif
.long .LFE2-.LFB2 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI2-.LFB2
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI3-.LCFI2
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI7-.LCFI3
.byte 0x83 /* DW_CFA_offset, column 0x3 */
.byte 0xa /* .uleb128 0xa */
#endif
.align 4
.LEFDE2:
#if !FFI_NO_RAW_API
.LSFDE3:
.long .LEFDE3-.LASFDE3 /* FDE Length */
.LASFDE3:
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
#ifdef __PIC__
.long .LFB3-. /* FDE initial location */
#else
.long .LFB3
#endif
.long .LFE3-.LFB3 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI4-.LFB3
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI5-.LCFI4
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI6-.LCFI5
.byte 0x86 /* DW_CFA_offset, column 0x6 */
.byte 0x3 /* .uleb128 0x3 */
.align 4
.LEFDE3:
#endif
#endif /* ifndef __x86_64__ */

View file

@ -0,0 +1,483 @@
/* -----------------------------------------------------------------------
sysv.S - Copyright (c) 2013 The Written Word, Inc.
- Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
X86 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#ifndef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
.text
.globl ffi_prep_args
.align 4
.globl ffi_call_SYSV
.type ffi_call_SYSV,@function
ffi_call_SYSV:
.LFB1:
pushl %ebp
.LCFI0:
movl %esp,%ebp
.LCFI1:
/* Make room for all of the new args. */
movl 16(%ebp),%ecx
subl %ecx,%esp
/* Align the stack pointer to 16-bytes */
andl $0xfffffff0, %esp
movl %esp,%eax
/* Place all of the ffi_prep_args in position */
pushl 12(%ebp)
pushl %eax
call *8(%ebp)
/* Return stack to previous state and call the function */
addl $8,%esp
call *28(%ebp)
/* Load %ecx with the return type code */
movl 20(%ebp),%ecx
/* Protect %esi. We're going to pop it in the epilogue. */
pushl %esi
/* If the return value pointer is NULL, assume no return value. */
cmpl $0,24(%ebp)
jne 0f
/* Even if there is no space for the return value, we are
obliged to handle floating-point values. */
cmpl $FFI_TYPE_FLOAT,%ecx
jne noretval
fstp %st(0)
jmp epilogue
0:
call 1f
.Lstore_table:
.long noretval-.Lstore_table /* FFI_TYPE_VOID */
.long retint-.Lstore_table /* FFI_TYPE_INT */
.long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
.long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
.long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
.long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
.long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
.long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
.long retint-.Lstore_table /* FFI_TYPE_UINT32 */
.long retint-.Lstore_table /* FFI_TYPE_SINT32 */
.long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
.long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
.long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
.long retint-.Lstore_table /* FFI_TYPE_POINTER */
1:
pop %esi
add (%esi, %ecx, 4), %esi
jmp *%esi
/* Sign/zero extend as appropriate. */
retsint8:
movsbl %al, %eax
jmp retint
retsint16:
movswl %ax, %eax
jmp retint
retuint8:
movzbl %al, %eax
jmp retint
retuint16:
movzwl %ax, %eax
jmp retint
retfloat:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstps (%ecx)
jmp epilogue
retdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpl (%ecx)
jmp epilogue
retlongdouble:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
fstpt (%ecx)
jmp epilogue
retint64:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
movl %edx,4(%ecx)
jmp epilogue
retint:
/* Load %ecx with the pointer to storage for the return value */
movl 24(%ebp),%ecx
movl %eax,0(%ecx)
retstruct:
/* Nothing to do! */
noretval:
epilogue:
popl %esi
movl %ebp,%esp
popl %ebp
ret
.LFE1:
.ffi_call_SYSV_end:
.size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
.align 4
FFI_HIDDEN (ffi_closure_SYSV)
.globl ffi_closure_SYSV
.type ffi_closure_SYSV, @function
ffi_closure_SYSV:
.LFB2:
pushl %ebp
.LCFI2:
movl %esp, %ebp
.LCFI3:
subl $40, %esp
leal -24(%ebp), %edx
movl %edx, -12(%ebp) /* resp */
leal 8(%ebp), %edx
#ifdef __SUNPRO_C
/* The SUNPRO compiler doesn't support GCC's regparm function
attribute, so we have to pass all three arguments to
ffi_closure_SYSV_inner on the stack. */
movl %edx, 8(%esp) /* args = __builtin_dwarf_cfa () */
leal -12(%ebp), %edx
movl %edx, 4(%esp) /* &resp */
movl %eax, (%esp) /* closure */
#else
movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
leal -12(%ebp), %edx
movl %edx, (%esp) /* &resp */
#endif
#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
call ffi_closure_SYSV_inner
#else
movl %ebx, 8(%esp)
.LCFI7:
call 1f
1: popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
call ffi_closure_SYSV_inner@PLT
movl 8(%esp), %ebx
#endif
movl -12(%ebp), %ecx
cmpl $FFI_TYPE_INT, %eax
je .Lcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lcls_retint
0: cmpl $FFI_TYPE_FLOAT, %eax
je .Lcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lcls_retllong
cmpl $FFI_TYPE_STRUCT, %eax
je .Lcls_retstruct
.Lcls_epilogue:
movl %ebp, %esp
popl %ebp
ret
.Lcls_retint:
movl (%ecx), %eax
jmp .Lcls_epilogue
.Lcls_retfloat:
flds (%ecx)
jmp .Lcls_epilogue
.Lcls_retdouble:
fldl (%ecx)
jmp .Lcls_epilogue
.Lcls_retldouble:
fldt (%ecx)
jmp .Lcls_epilogue
.Lcls_retllong:
movl (%ecx), %eax
movl 4(%ecx), %edx
jmp .Lcls_epilogue
.Lcls_retstruct:
movl %ebp, %esp
popl %ebp
ret $4
.LFE2:
.size ffi_closure_SYSV, .-ffi_closure_SYSV
#if !FFI_NO_RAW_API
/* Precalculate for e.g. the Solaris 10/x86 assembler. */
#if FFI_TRAMPOLINE_SIZE == 10
#define RAW_CLOSURE_CIF_OFFSET 12
#define RAW_CLOSURE_FUN_OFFSET 16
#define RAW_CLOSURE_USER_DATA_OFFSET 20
#elif FFI_TRAMPOLINE_SIZE == 24
#define RAW_CLOSURE_CIF_OFFSET 24
#define RAW_CLOSURE_FUN_OFFSET 28
#define RAW_CLOSURE_USER_DATA_OFFSET 32
#else
#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#endif
#define CIF_FLAGS_OFFSET 20
.align 4
FFI_HIDDEN (ffi_closure_raw_SYSV)
.globl ffi_closure_raw_SYSV
.type ffi_closure_raw_SYSV, @function
ffi_closure_raw_SYSV:
.LFB3:
pushl %ebp
.LCFI4:
movl %esp, %ebp
.LCFI5:
pushl %esi
.LCFI6:
subl $36, %esp
movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
movl %edx, 12(%esp) /* user_data */
leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
movl %edx, 8(%esp) /* raw_args */
leal -24(%ebp), %edx
movl %edx, 4(%esp) /* &res */
movl %esi, (%esp) /* cif */
call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
cmpl $FFI_TYPE_INT, %eax
je .Lrcls_retint
/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
cmpl $FFI_TYPE_UINT64, %eax
jge 0f
cmpl $FFI_TYPE_UINT8, %eax
jge .Lrcls_retint
0:
cmpl $FFI_TYPE_FLOAT, %eax
je .Lrcls_retfloat
cmpl $FFI_TYPE_DOUBLE, %eax
je .Lrcls_retdouble
cmpl $FFI_TYPE_LONGDOUBLE, %eax
je .Lrcls_retldouble
cmpl $FFI_TYPE_SINT64, %eax
je .Lrcls_retllong
.Lrcls_epilogue:
addl $36, %esp
popl %esi
popl %ebp
ret
.Lrcls_retint:
movl -24(%ebp), %eax
jmp .Lrcls_epilogue
.Lrcls_retfloat:
flds -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retdouble:
fldl -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retldouble:
fldt -24(%ebp)
jmp .Lrcls_epilogue
.Lrcls_retllong:
movl -24(%ebp), %eax
movl -20(%ebp), %edx
jmp .Lrcls_epilogue
.LFE3:
.size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
#endif
#if defined __GNUC__
/* Only emit dwarf unwind info when building with GNU toolchain. */
#if defined __PIC__
# if defined __sun__ && defined __svr4__
/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22
doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this. */
# define FDE_ENCODING 0x30 /* datarel */
# define FDE_ENCODE(X) X@GOTOFF
# else
# define FDE_ENCODING 0x1b /* pcrel sdata4 */
# if defined HAVE_AS_X86_PCREL
# define FDE_ENCODE(X) X-.
# else
# define FDE_ENCODE(X) X@rel
# endif
# endif
#else
# define FDE_ENCODING 0 /* absolute */
# define FDE_ENCODE(X) X
#endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
.long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
.LSCIE1:
.long 0x0 /* CIE Identifier Tag */
.byte 0x1 /* CIE Version */
#ifdef HAVE_AS_ASCII_PSEUDO_OP
#ifdef __PIC__
.ascii "zR\0" /* CIE Augmentation */
#else
.ascii "\0" /* CIE Augmentation */
#endif
#elif defined HAVE_AS_STRING_PSEUDO_OP
#ifdef __PIC__
.string "zR" /* CIE Augmentation */
#else
.string "" /* CIE Augmentation */
#endif
#else
#error missing .ascii/.string
#endif
.byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
.byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
.byte 0x8 /* CIE RA Column */
#ifdef __PIC__
.byte 0x1 /* .uleb128 0x1; Augmentation size */
.byte FDE_ENCODING
#endif
.byte 0xc /* DW_CFA_def_cfa */
.byte 0x4 /* .uleb128 0x4 */
.byte 0x4 /* .uleb128 0x4 */
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.byte 0x1 /* .uleb128 0x1 */
.align 4
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1 /* FDE Length */
.LASFDE1:
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
.long FDE_ENCODE(.LFB1) /* FDE initial location */
.long .LFE1-.LFB1 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI0-.LFB1
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI1-.LCFI0
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
.align 4
.LEFDE1:
.LSFDE2:
.long .LEFDE2-.LASFDE2 /* FDE Length */
.LASFDE2:
.long .LASFDE2-.Lframe1 /* FDE CIE offset */
.long FDE_ENCODE(.LFB2) /* FDE initial location */
.long .LFE2-.LFB2 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI2-.LFB2
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI3-.LCFI2
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI7-.LCFI3
.byte 0x83 /* DW_CFA_offset, column 0x3 */
.byte 0xa /* .uleb128 0xa */
#endif
.align 4
.LEFDE2:
#if !FFI_NO_RAW_API
.LSFDE3:
.long .LEFDE3-.LASFDE3 /* FDE Length */
.LASFDE3:
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
.long FDE_ENCODE(.LFB3) /* FDE initial location */
.long .LFE3-.LFB3 /* FDE address range */
#ifdef __PIC__
.byte 0x0 /* .uleb128 0x0; Augmentation size */
#endif
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI4-.LFB3
.byte 0xe /* DW_CFA_def_cfa_offset */
.byte 0x8 /* .uleb128 0x8 */
.byte 0x85 /* DW_CFA_offset, column 0x5 */
.byte 0x2 /* .uleb128 0x2 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI5-.LCFI4
.byte 0xd /* DW_CFA_def_cfa_register */
.byte 0x5 /* .uleb128 0x5 */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LCFI6-.LCFI5
.byte 0x86 /* DW_CFA_offset, column 0x6 */
.byte 0x3 /* .uleb128 0x3 */
.align 4
.LEFDE3:
#endif
#endif
#endif /* ifndef __x86_64__ */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
#endif

View file

@ -0,0 +1,432 @@
/* -----------------------------------------------------------------------
unix64.S - Copyright (c) 2013 The Written Word, Inc.
- Copyright (c) 2008 Red Hat, Inc
- Copyright (c) 2002 Bo Thorsen <bo@suse.de>
x86-64 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
``Software''), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
#ifdef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
.text
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
.align 2
.globl ffi_call_unix64
.type ffi_call_unix64,@function
ffi_call_unix64:
.LUW0:
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
movq %rcx, 8(%rax) /* Save raddr. */
movq %rbp, 16(%rax) /* Save old frame pointer. */
movq %r10, 24(%rax) /* Relocate return address. */
movq %rax, %rbp /* Finalize local stack frame. */
.LUW1:
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
movq 8(%r10), %rsi
movq 16(%r10), %rdx
movq 24(%r10), %rcx
movq 32(%r10), %r8
movq 40(%r10), %r9
testl %eax, %eax
jnz .Lload_sse
.Lret_from_load_sse:
/* Deallocate the reg arg area. */
leaq 176(%r10), %rsp
/* Call the user function. */
call *%r11
/* Deallocate stack arg area; local stack frame in redzone. */
leaq 24(%rbp), %rsp
movq 0(%rbp), %rcx /* Reload flags. */
movq 8(%rbp), %rdi /* Reload raddr. */
movq 16(%rbp), %rbp /* Reload old frame pointer. */
.LUW2:
/* The first byte of the flags contains the FFI_TYPE. */
movzbl %cl, %r10d
leaq .Lstore_table(%rip), %r11
movslq (%r11, %r10, 4), %r10
addq %r11, %r10
jmp *%r10
.Lstore_table:
.long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
.long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
.long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
.long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
.long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
.long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
.long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
.long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
.long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
.align 2
.Lst_void:
ret
.align 2
.Lst_uint8:
movzbq %al, %rax
movq %rax, (%rdi)
ret
.align 2
.Lst_sint8:
movsbq %al, %rax
movq %rax, (%rdi)
ret
.align 2
.Lst_uint16:
movzwq %ax, %rax
movq %rax, (%rdi)
.align 2
.Lst_sint16:
movswq %ax, %rax
movq %rax, (%rdi)
ret
.align 2
.Lst_uint32:
movl %eax, %eax
movq %rax, (%rdi)
.align 2
.Lst_sint32:
cltq
movq %rax, (%rdi)
ret
.align 2
.Lst_int64:
movq %rax, (%rdi)
ret
.align 2
.Lst_float:
movss %xmm0, (%rdi)
ret
.align 2
.Lst_double:
movsd %xmm0, (%rdi)
ret
.Lst_ldouble:
fstpt (%rdi)
ret
.align 2
.Lst_struct:
leaq -20(%rsp), %rsi /* Scratch area in redzone. */
/* We have to locate the values now, and since we don't want to
write too much data into the user's return value, we spill the
value to a 16 byte scratch area first. Bits 8, 9, and 10
control where the values are located. Only one of the three
bits will be set; see ffi_prep_cif_machdep for the pattern. */
movd %xmm0, %r10
movd %xmm1, %r11
testl $0x100, %ecx
cmovnz %rax, %rdx
cmovnz %r10, %rax
testl $0x200, %ecx
cmovnz %r10, %rdx
testl $0x400, %ecx
cmovnz %r10, %rax
cmovnz %r11, %rdx
movq %rax, (%rsi)
movq %rdx, 8(%rsi)
/* Bits 12-31 contain the true size of the structure. Copy from
the scratch area to the true destination. */
shrl $12, %ecx
rep movsb
ret
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
.align 2
.LUW3:
.Lload_sse:
movdqa 48(%r10), %xmm0
movdqa 64(%r10), %xmm1
movdqa 80(%r10), %xmm2
movdqa 96(%r10), %xmm3
movdqa 112(%r10), %xmm4
movdqa 128(%r10), %xmm5
movdqa 144(%r10), %xmm6
movdqa 160(%r10), %xmm7
jmp .Lret_from_load_sse
.LUW4:
.size ffi_call_unix64,.-ffi_call_unix64
.align 2
.globl ffi_closure_unix64
.type ffi_closure_unix64,@function
ffi_closure_unix64:
.LUW5:
/* The carry flag is set by the trampoline iff SSE registers
are used. Don't clobber it before the branch instruction. */
leaq -200(%rsp), %rsp
.LUW6:
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rdx, 16(%rsp)
movq %rcx, 24(%rsp)
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
jc .Lsave_sse
.Lret_from_save_sse:
movq %r10, %rdi
leaq 176(%rsp), %rsi
movq %rsp, %rdx
leaq 208(%rsp), %rcx
call ffi_closure_unix64_inner@PLT
/* Deallocate stack frame early; return value is now in redzone. */
addq $200, %rsp
.LUW7:
/* The first byte of the return value contains the FFI_TYPE. */
movzbl %al, %r10d
leaq .Lload_table(%rip), %r11
movslq (%r11, %r10, 4), %r10
addq %r11, %r10
jmp *%r10
.Lload_table:
.long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
.long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
.long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
.long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
.long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
.long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
.long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
.long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
.long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
.long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
.long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
.long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
.long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
.long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
.long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
.align 2
.Lld_void:
ret
.align 2
.Lld_int8:
movzbl -24(%rsp), %eax
ret
.align 2
.Lld_int16:
movzwl -24(%rsp), %eax
ret
.align 2
.Lld_int32:
movl -24(%rsp), %eax
ret
.align 2
.Lld_int64:
movq -24(%rsp), %rax
ret
.align 2
.Lld_float:
movss -24(%rsp), %xmm0
ret
.align 2
.Lld_double:
movsd -24(%rsp), %xmm0
ret
.align 2
.Lld_ldouble:
fldt -24(%rsp)
ret
.align 2
.Lld_struct:
/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
%rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
both rdx and xmm1 with the second word. For the remaining,
bit 8 set means xmm0 gets the second word, and bit 9 means
that rax gets the second word. */
movq -24(%rsp), %rcx
movq -16(%rsp), %rdx
movq -16(%rsp), %xmm1
testl $0x100, %eax
cmovnz %rdx, %rcx
movd %rcx, %xmm0
testl $0x200, %eax
movq -24(%rsp), %rax
cmovnz %rdx, %rax
ret
/* See the comment above .Lload_sse; the same logic applies here. */
.align 2
.LUW8:
.Lsave_sse:
movdqa %xmm0, 48(%rsp)
movdqa %xmm1, 64(%rsp)
movdqa %xmm2, 80(%rsp)
movdqa %xmm3, 96(%rsp)
movdqa %xmm4, 112(%rsp)
movdqa %xmm5, 128(%rsp)
movdqa %xmm6, 144(%rsp)
movdqa %xmm7, 160(%rsp)
jmp .Lret_from_save_sse
.LUW9:
.size ffi_closure_unix64,.-ffi_closure_unix64
#ifdef __GNUC__
/* Only emit DWARF unwind info when building with the GNU toolchain. */
#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
.section .eh_frame,"a",@unwind
#else
.section .eh_frame,"a",@progbits
#endif
.Lframe1:
.long .LECIE1-.LSCIE1 /* CIE Length */
.LSCIE1:
.long 0 /* CIE Identifier Tag */
.byte 1 /* CIE Version */
.ascii "zR\0" /* CIE Augmentation */
.uleb128 1 /* CIE Code Alignment Factor */
.sleb128 -8 /* CIE Data Alignment Factor */
.byte 0x10 /* CIE RA Column */
.uleb128 1 /* Augmentation size */
.byte 0x1b /* FDE Encoding (pcrel sdata4) */
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
.uleb128 7
.uleb128 8
.byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
.uleb128 1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1 /* FDE Length */
.LASFDE1:
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
#if HAVE_AS_X86_PCREL
.long .LUW0-. /* FDE initial location */
#else
.long .LUW0@rel
#endif
.long .LUW4-.LUW0 /* FDE address range */
.uleb128 0x0 /* Augmentation size */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW1-.LUW0
/* New stack frame based off rbp. This is a itty bit of unwind
trickery in that the CFA *has* changed. There is no easy way
to describe it correctly on entry to the function. Fortunately,
it doesn't matter too much since at all points we can correctly
unwind back to ffi_call. Note that the location to which we
moved the return address is (the new) CFA-8, so from the
perspective of the unwind info, it hasn't moved. */
.byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
.uleb128 6
.uleb128 32
.byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
.uleb128 2
.byte 0xa /* DW_CFA_remember_state */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW2-.LUW1
.byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
.uleb128 7
.uleb128 8
.byte 0xc0+6 /* DW_CFA_restore, %rbp */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW3-.LUW2
.byte 0xb /* DW_CFA_restore_state */
.align 8
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3 /* FDE Length */
.LASFDE3:
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
#if HAVE_AS_X86_PCREL
.long .LUW5-. /* FDE initial location */
#else
.long .LUW5@rel
#endif
.long .LUW9-.LUW5 /* FDE address range */
.uleb128 0x0 /* Augmentation size */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW6-.LUW5
.byte 0xe /* DW_CFA_def_cfa_offset */
.uleb128 208
.byte 0xa /* DW_CFA_remember_state */
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW7-.LUW6
.byte 0xe /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x4 /* DW_CFA_advance_loc4 */
.long .LUW8-.LUW7
.byte 0xb /* DW_CFA_restore_state */
.align 8
.LEFDE3:
#endif /* __GNUC__ */
#endif /* __x86_64__ */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,468 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
/* Constants for ffi_call_win64 */
#define STACK 0
#define PREP_ARGS_FN 32
#define ECIF 40
#define CIF_BYTES 48
#define CIF_FLAGS 56
#define RVALUE 64
#define FN 72
/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
extended_cif *ecif, unsigned bytes, unsigned flags,
unsigned *rvalue, void (*fn)());
*/
#ifdef _MSC_VER
PUBLIC ffi_call_win64
EXTRN __chkstk:NEAR
EXTRN ffi_closure_win64_inner:NEAR
_TEXT SEGMENT
;;; ffi_closure_win64 will be called with these registers set:
;;; rax points to 'closure'
;;; r11 contains a bit mask that specifies which of the
;;; first four parameters are float or double
;;;
;;; It must move the parameters passed in registers to their stack location,
;;; call ffi_closure_win64_inner for the actual work, then return the result.
;;;
ffi_closure_win64 PROC FRAME
;; copy register arguments onto stack
test r11, 1
jne first_is_float
mov QWORD PTR [rsp+8], rcx
jmp second
first_is_float:
movlpd QWORD PTR [rsp+8], xmm0
second:
test r11, 2
jne second_is_float
mov QWORD PTR [rsp+16], rdx
jmp third
second_is_float:
movlpd QWORD PTR [rsp+16], xmm1
third:
test r11, 4
jne third_is_float
mov QWORD PTR [rsp+24], r8
jmp fourth
third_is_float:
movlpd QWORD PTR [rsp+24], xmm2
fourth:
test r11, 8
jne fourth_is_float
mov QWORD PTR [rsp+32], r9
jmp done
fourth_is_float:
movlpd QWORD PTR [rsp+32], xmm3
done:
.ALLOCSTACK 40
sub rsp, 40
.ENDPROLOG
mov rcx, rax ; context is first parameter
mov rdx, rsp ; stack is second parameter
add rdx, 48 ; point to start of arguments
mov rax, ffi_closure_win64_inner
call rax ; call the real closure function
add rsp, 40
movd xmm0, rax ; If the closure returned a float,
; ffi_closure_win64_inner wrote it to rax
ret 0
ffi_closure_win64 ENDP
ffi_call_win64 PROC FRAME
;; copy registers onto stack
mov QWORD PTR [rsp+32], r9
mov QWORD PTR [rsp+24], r8
mov QWORD PTR [rsp+16], rdx
mov QWORD PTR [rsp+8], rcx
.PUSHREG rbp
push rbp
.ALLOCSTACK 48
sub rsp, 48 ; 00000030H
.SETFRAME rbp, 32
lea rbp, QWORD PTR [rsp+32]
.ENDPROLOG
mov eax, DWORD PTR CIF_BYTES[rbp]
add rax, 15
and rax, -16
call __chkstk
sub rsp, rax
lea rax, QWORD PTR [rsp+32]
mov QWORD PTR STACK[rbp], rax
mov rdx, QWORD PTR ECIF[rbp]
mov rcx, QWORD PTR STACK[rbp]
call QWORD PTR PREP_ARGS_FN[rbp]
mov rsp, QWORD PTR STACK[rbp]
movlpd xmm3, QWORD PTR [rsp+24]
movd r9, xmm3
movlpd xmm2, QWORD PTR [rsp+16]
movd r8, xmm2
movlpd xmm1, QWORD PTR [rsp+8]
movd rdx, xmm1
movlpd xmm0, QWORD PTR [rsp]
movd rcx, xmm0
call QWORD PTR FN[rbp]
ret_struct4b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
jne ret_struct2b$
mov rcx, QWORD PTR RVALUE[rbp]
mov DWORD PTR [rcx], eax
jmp ret_void$
ret_struct2b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
jne ret_struct1b$
mov rcx, QWORD PTR RVALUE[rbp]
mov WORD PTR [rcx], ax
jmp ret_void$
ret_struct1b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
jne ret_uint8$
mov rcx, QWORD PTR RVALUE[rbp]
mov BYTE PTR [rcx], al
jmp ret_void$
ret_uint8$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
jne ret_sint8$
mov rcx, QWORD PTR RVALUE[rbp]
movzx rax, al
mov QWORD PTR [rcx], rax
jmp ret_void$
ret_sint8$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
jne ret_uint16$
mov rcx, QWORD PTR RVALUE[rbp]
movsx rax, al
mov QWORD PTR [rcx], rax
jmp ret_void$
ret_uint16$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
jne ret_sint16$
mov rcx, QWORD PTR RVALUE[rbp]
movzx rax, ax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_sint16$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
jne ret_uint32$
mov rcx, QWORD PTR RVALUE[rbp]
movsx rax, ax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_uint32$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
jne ret_sint32$
mov rcx, QWORD PTR RVALUE[rbp]
mov eax, eax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_sint32$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
jne ret_float$
mov rcx, QWORD PTR RVALUE[rbp]
cdqe
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_float$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
jne SHORT ret_double$
mov rax, QWORD PTR RVALUE[rbp]
movss DWORD PTR [rax], xmm0
jmp SHORT ret_void$
ret_double$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
jne SHORT ret_sint64$
mov rax, QWORD PTR RVALUE[rbp]
movlpd QWORD PTR [rax], xmm0
jmp SHORT ret_void$
ret_sint64$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
jne ret_void$
mov rcx, QWORD PTR RVALUE[rbp]
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_void$:
xor rax, rax
lea rsp, QWORD PTR [rbp+16]
pop rbp
ret 0
ffi_call_win64 ENDP
_TEXT ENDS
END
#else
#ifdef SYMBOL_UNDERSCORE
#define SYMBOL_NAME(name) _##name
#else
#define SYMBOL_NAME(name) name
#endif
.text
.extern SYMBOL_NAME(ffi_closure_win64_inner)
# ffi_closure_win64 will be called with these registers set:
# rax points to 'closure'
# r11 contains a bit mask that specifies which of the
# first four parameters are float or double
#
# It must move the parameters passed in registers to their stack location,
# call ffi_closure_win64_inner for the actual work, then return the result.
#
.balign 16
.globl SYMBOL_NAME(ffi_closure_win64)
SYMBOL_NAME(ffi_closure_win64):
# copy register arguments onto stack
test $1,%r11
jne .Lfirst_is_float
mov %rcx, 8(%rsp)
jmp .Lsecond
.Lfirst_is_float:
movlpd %xmm0, 8(%rsp)
.Lsecond:
test $2, %r11
jne .Lsecond_is_float
mov %rdx, 16(%rsp)
jmp .Lthird
.Lsecond_is_float:
movlpd %xmm1, 16(%rsp)
.Lthird:
test $4, %r11
jne .Lthird_is_float
mov %r8,24(%rsp)
jmp .Lfourth
.Lthird_is_float:
movlpd %xmm2, 24(%rsp)
.Lfourth:
test $8, %r11
jne .Lfourth_is_float
mov %r9, 32(%rsp)
jmp .Ldone
.Lfourth_is_float:
movlpd %xmm3, 32(%rsp)
.Ldone:
#.ALLOCSTACK 40
sub $40, %rsp
#.ENDPROLOG
mov %rax, %rcx # context is first parameter
mov %rsp, %rdx # stack is second parameter
add $48, %rdx # point to start of arguments
mov $SYMBOL_NAME(ffi_closure_win64_inner), %rax
callq *%rax # call the real closure function
add $40, %rsp
movq %rax, %xmm0 # If the closure returned a float,
# ffi_closure_win64_inner wrote it to rax
retq
.ffi_closure_win64_end:
.balign 16
.globl SYMBOL_NAME(ffi_call_win64)
SYMBOL_NAME(ffi_call_win64):
# copy registers onto stack
mov %r9,32(%rsp)
mov %r8,24(%rsp)
mov %rdx,16(%rsp)
mov %rcx,8(%rsp)
#.PUSHREG rbp
push %rbp
#.ALLOCSTACK 48
sub $48,%rsp
#.SETFRAME rbp, 32
lea 32(%rsp),%rbp
#.ENDPROLOG
mov CIF_BYTES(%rbp),%eax
add $15, %rax
and $-16, %rax
cmpq $0x1000, %rax
jb Lch_done
Lch_probe:
subq $0x1000,%rsp
orl $0x0, (%rsp)
subq $0x1000,%rax
cmpq $0x1000,%rax
ja Lch_probe
Lch_done:
subq %rax, %rsp
orl $0x0, (%rsp)
lea 32(%rsp), %rax
mov %rax, STACK(%rbp)
mov ECIF(%rbp), %rdx
mov STACK(%rbp), %rcx
callq *PREP_ARGS_FN(%rbp)
mov STACK(%rbp), %rsp
movlpd 24(%rsp), %xmm3
movd %xmm3, %r9
movlpd 16(%rsp), %xmm2
movd %xmm2, %r8
movlpd 8(%rsp), %xmm1
movd %xmm1, %rdx
movlpd (%rsp), %xmm0
movd %xmm0, %rcx
callq *FN(%rbp)
.Lret_struct4b:
cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
jne .Lret_struct2b
mov RVALUE(%rbp), %rcx
mov %eax, (%rcx)
jmp .Lret_void
.Lret_struct2b:
cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
jne .Lret_struct1b
mov RVALUE(%rbp), %rcx
mov %ax, (%rcx)
jmp .Lret_void
.Lret_struct1b:
cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
jne .Lret_uint8
mov RVALUE(%rbp), %rcx
mov %al, (%rcx)
jmp .Lret_void
.Lret_uint8:
cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
jne .Lret_sint8
mov RVALUE(%rbp), %rcx
movzbq %al, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint8:
cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
jne .Lret_uint16
mov RVALUE(%rbp), %rcx
movsbq %al, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_uint16:
cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
jne .Lret_sint16
mov RVALUE(%rbp), %rcx
movzwq %ax, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint16:
cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
jne .Lret_uint32
mov RVALUE(%rbp), %rcx
movswq %ax, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_uint32:
cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
jne .Lret_sint32
mov RVALUE(%rbp), %rcx
movl %eax, %eax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint32:
cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
jne .Lret_float
mov RVALUE(%rbp), %rcx
cltq
movq %rax, (%rcx)
jmp .Lret_void
.Lret_float:
cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
jne .Lret_double
mov RVALUE(%rbp), %rax
movss %xmm0, (%rax)
jmp .Lret_void
.Lret_double:
cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
jne .Lret_sint64
mov RVALUE(%rbp), %rax
movlpd %xmm0, (%rax)
jmp .Lret_void
.Lret_sint64:
cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
jne .Lret_void
mov RVALUE(%rbp), %rcx
mov %rax, (%rcx)
jmp .Lret_void
.Lret_void:
xor %rax, %rax
lea 16(%rbp), %rsp
pop %rbp
retq
.ffi_call_win64_end:
#endif /* !_MSC_VER */

View file

@ -0,0 +1,468 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
/* Constants for ffi_call_win64 */
#define STACK 0
#define PREP_ARGS_FN 32
#define ECIF 40
#define CIF_BYTES 48
#define CIF_FLAGS 56
#define RVALUE 64
#define FN 72
/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
extended_cif *ecif, unsigned bytes, unsigned flags,
unsigned *rvalue, void (*fn)());
*/
#ifdef _MSC_VER
PUBLIC ffi_call_win64
EXTRN __chkstk:NEAR
EXTRN ffi_closure_win64_inner:NEAR
_TEXT SEGMENT
;;; ffi_closure_win64 will be called with these registers set:
;;; rax points to 'closure'
;;; r11 contains a bit mask that specifies which of the
;;; first four parameters are float or double
;;;
;;; It must move the parameters passed in registers to their stack location,
;;; call ffi_closure_win64_inner for the actual work, then return the result.
;;;
ffi_closure_win64 PROC FRAME
;; copy register arguments onto stack
test r11, 1
jne first_is_float
mov QWORD PTR [rsp+8], rcx
jmp second
first_is_float:
movlpd QWORD PTR [rsp+8], xmm0
second:
test r11, 2
jne second_is_float
mov QWORD PTR [rsp+16], rdx
jmp third
second_is_float:
movlpd QWORD PTR [rsp+16], xmm1
third:
test r11, 4
jne third_is_float
mov QWORD PTR [rsp+24], r8
jmp fourth
third_is_float:
movlpd QWORD PTR [rsp+24], xmm2
fourth:
test r11, 8
jne fourth_is_float
mov QWORD PTR [rsp+32], r9
jmp done
fourth_is_float:
movlpd QWORD PTR [rsp+32], xmm3
done:
.ALLOCSTACK 40
sub rsp, 40
.ENDPROLOG
mov rcx, rax ; context is first parameter
mov rdx, rsp ; stack is second parameter
add rdx, 48 ; point to start of arguments
mov rax, ffi_closure_win64_inner
call rax ; call the real closure function
add rsp, 40
movd xmm0, rax ; If the closure returned a float,
; ffi_closure_win64_inner wrote it to rax
ret 0
ffi_closure_win64 ENDP
ffi_call_win64 PROC FRAME
;; copy registers onto stack
mov QWORD PTR [rsp+32], r9
mov QWORD PTR [rsp+24], r8
mov QWORD PTR [rsp+16], rdx
mov QWORD PTR [rsp+8], rcx
.PUSHREG rbp
push rbp
.ALLOCSTACK 48
sub rsp, 48 ; 00000030H
.SETFRAME rbp, 32
lea rbp, QWORD PTR [rsp+32]
.ENDPROLOG
mov eax, DWORD PTR CIF_BYTES[rbp]
add rax, 15
and rax, -16
call __chkstk
sub rsp, rax
lea rax, QWORD PTR [rsp+32]
mov QWORD PTR STACK[rbp], rax
mov rdx, QWORD PTR ECIF[rbp]
mov rcx, QWORD PTR STACK[rbp]
call QWORD PTR PREP_ARGS_FN[rbp]
mov rsp, QWORD PTR STACK[rbp]
movlpd xmm3, QWORD PTR [rsp+24]
movd r9, xmm3
movlpd xmm2, QWORD PTR [rsp+16]
movd r8, xmm2
movlpd xmm1, QWORD PTR [rsp+8]
movd rdx, xmm1
movlpd xmm0, QWORD PTR [rsp]
movd rcx, xmm0
call QWORD PTR FN[rbp]
ret_struct4b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
jne ret_struct2b$
mov rcx, QWORD PTR RVALUE[rbp]
mov DWORD PTR [rcx], eax
jmp ret_void$
ret_struct2b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
jne ret_struct1b$
mov rcx, QWORD PTR RVALUE[rbp]
mov WORD PTR [rcx], ax
jmp ret_void$
ret_struct1b$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
jne ret_uint8$
mov rcx, QWORD PTR RVALUE[rbp]
mov BYTE PTR [rcx], al
jmp ret_void$
ret_uint8$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
jne ret_sint8$
mov rcx, QWORD PTR RVALUE[rbp]
movzx rax, al
mov QWORD PTR [rcx], rax
jmp ret_void$
ret_sint8$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
jne ret_uint16$
mov rcx, QWORD PTR RVALUE[rbp]
movsx rax, al
mov QWORD PTR [rcx], rax
jmp ret_void$
ret_uint16$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
jne ret_sint16$
mov rcx, QWORD PTR RVALUE[rbp]
movzx rax, ax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_sint16$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
jne ret_uint32$
mov rcx, QWORD PTR RVALUE[rbp]
movsx rax, ax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_uint32$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
jne ret_sint32$
mov rcx, QWORD PTR RVALUE[rbp]
mov eax, eax
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_sint32$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
jne ret_float$
mov rcx, QWORD PTR RVALUE[rbp]
cdqe
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_float$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
jne SHORT ret_double$
mov rax, QWORD PTR RVALUE[rbp]
movss DWORD PTR [rax], xmm0
jmp SHORT ret_void$
ret_double$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
jne SHORT ret_sint64$
mov rax, QWORD PTR RVALUE[rbp]
movlpd QWORD PTR [rax], xmm0
jmp SHORT ret_void$
ret_sint64$:
cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
jne ret_void$
mov rcx, QWORD PTR RVALUE[rbp]
mov QWORD PTR [rcx], rax
jmp SHORT ret_void$
ret_void$:
xor rax, rax
lea rsp, QWORD PTR [rbp+16]
pop rbp
ret 0
ffi_call_win64 ENDP
_TEXT ENDS
END
#else
#ifdef SYMBOL_UNDERSCORE
#define SYMBOL_NAME(name) _##name
#else
#define SYMBOL_NAME(name) name
#endif
.text
.extern SYMBOL_NAME(ffi_closure_win64_inner)
# ffi_closure_win64 will be called with these registers set:
# rax points to 'closure'
# r11 contains a bit mask that specifies which of the
# first four parameters are float or double
#
# It must move the parameters passed in registers to their stack location,
# call ffi_closure_win64_inner for the actual work, then return the result.
#
.balign 16
.globl SYMBOL_NAME(ffi_closure_win64)
SYMBOL_NAME(ffi_closure_win64):
# copy register arguments onto stack
test $1,%r11
jne .Lfirst_is_float
mov %rcx, 8(%rsp)
jmp .Lsecond
.Lfirst_is_float:
movlpd %xmm0, 8(%rsp)
.Lsecond:
test $2, %r11
jne .Lsecond_is_float
mov %rdx, 16(%rsp)
jmp .Lthird
.Lsecond_is_float:
movlpd %xmm1, 16(%rsp)
.Lthird:
test $4, %r11
jne .Lthird_is_float
mov %r8,24(%rsp)
jmp .Lfourth
.Lthird_is_float:
movlpd %xmm2, 24(%rsp)
.Lfourth:
test $8, %r11
jne .Lfourth_is_float
mov %r9, 32(%rsp)
jmp .Ldone
.Lfourth_is_float:
movlpd %xmm3, 32(%rsp)
.Ldone:
#.ALLOCSTACK 40
sub $40, %rsp
#.ENDPROLOG
mov %rax, %rcx # context is first parameter
mov %rsp, %rdx # stack is second parameter
add $48, %rdx # point to start of arguments
mov $SYMBOL_NAME(ffi_closure_win64_inner), %rax
callq *%rax # call the real closure function
add $40, %rsp
movq %rax, %xmm0 # If the closure returned a float,
# ffi_closure_win64_inner wrote it to rax
retq
.ffi_closure_win64_end:
.balign 16
.globl SYMBOL_NAME(ffi_call_win64)
SYMBOL_NAME(ffi_call_win64):
# copy registers onto stack
mov %r9,32(%rsp)
mov %r8,24(%rsp)
mov %rdx,16(%rsp)
mov %rcx,8(%rsp)
#.PUSHREG rbp
push %rbp
#.ALLOCSTACK 48
sub $48,%rsp
#.SETFRAME rbp, 32
lea 32(%rsp),%rbp
#.ENDPROLOG
mov CIF_BYTES(%rbp),%eax
add $15, %rax
and $-16, %rax
cmpq $0x1000, %rax
jb Lch_done
Lch_probe:
subq $0x1000,%rsp
orl $0x0, (%rsp)
subq $0x1000,%rax
cmpq $0x1000,%rax
ja Lch_probe
Lch_done:
subq %rax, %rsp
orl $0x0, (%rsp)
lea 32(%rsp), %rax
mov %rax, STACK(%rbp)
mov ECIF(%rbp), %rdx
mov STACK(%rbp), %rcx
callq *PREP_ARGS_FN(%rbp)
mov STACK(%rbp), %rsp
movlpd 24(%rsp), %xmm3
movd %xmm3, %r9
movlpd 16(%rsp), %xmm2
movd %xmm2, %r8
movlpd 8(%rsp), %xmm1
movd %xmm1, %rdx
movlpd (%rsp), %xmm0
movd %xmm0, %rcx
callq *FN(%rbp)
.Lret_struct4b:
cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
jne .Lret_struct2b
mov RVALUE(%rbp), %rcx
mov %eax, (%rcx)
jmp .Lret_void
.Lret_struct2b:
cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
jne .Lret_struct1b
mov RVALUE(%rbp), %rcx
mov %ax, (%rcx)
jmp .Lret_void
.Lret_struct1b:
cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
jne .Lret_uint8
mov RVALUE(%rbp), %rcx
mov %al, (%rcx)
jmp .Lret_void
.Lret_uint8:
cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
jne .Lret_sint8
mov RVALUE(%rbp), %rcx
movzbq %al, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint8:
cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
jne .Lret_uint16
mov RVALUE(%rbp), %rcx
movsbq %al, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_uint16:
cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
jne .Lret_sint16
mov RVALUE(%rbp), %rcx
movzwq %ax, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint16:
cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
jne .Lret_uint32
mov RVALUE(%rbp), %rcx
movswq %ax, %rax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_uint32:
cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
jne .Lret_sint32
mov RVALUE(%rbp), %rcx
movl %eax, %eax
movq %rax, (%rcx)
jmp .Lret_void
.Lret_sint32:
cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
jne .Lret_float
mov RVALUE(%rbp), %rcx
cltq
movq %rax, (%rcx)
jmp .Lret_void
.Lret_float:
cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
jne .Lret_double
mov RVALUE(%rbp), %rax
movss %xmm0, (%rax)
jmp .Lret_void
.Lret_double:
cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
jne .Lret_sint64
mov RVALUE(%rbp), %rax
movlpd %xmm0, (%rax)
jmp .Lret_void
.Lret_sint64:
cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
jne .Lret_void
mov RVALUE(%rbp), %rcx
mov %rax, (%rcx)
jmp .Lret_void
.Lret_void:
xor %rax, %rax
lea 16(%rbp), %rsp
pop %rbp
retq
.ffi_call_win64_end:
#endif /* !_MSC_VER */