Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/facebook/luaffifb.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames R. McKaskill <jmckaskill@gmail.com>2011-07-15 04:34:02 +0400
committerJames R. McKaskill <jmckaskill@gmail.com>2011-07-15 04:34:02 +0400
commitb8f7239faa544b7fc936747a952d4912385a1835 (patch)
tree3a2569838bbfd0d8f42f76f58afa7c9082ad1830 /call_x86.dasc
parent24b26c8e59fd7b8f0556b06d7748df70ba503af9 (diff)
Normalising line endings
Diffstat (limited to 'call_x86.dasc')
-rw-r--r--call_x86.dasc1532
1 files changed, 766 insertions, 766 deletions
diff --git a/call_x86.dasc b/call_x86.dasc
index cd6ab15..224f02a 100644
--- a/call_x86.dasc
+++ b/call_x86.dasc
@@ -1,766 +1,766 @@
-|.if X64
-|.arch x64
-|.else
-|.arch x86
-|.endif
-
-|.actionlist build_actionlist
-|.globalnames globnames
-
-|.if not X64
-|.define RET_H, edx // for int64_t returns
-|.define RET_L, eax
-|.endif
-
-void compile_globals(jit_t* jit, lua_State* L)
-{
- jit_t* Dst = jit;
- dasm_setup(Dst, build_actionlist);
-
- /* Note: since the return code uses EBP to reset the stack pointer, we
- * don't have to track the amount of stack space used. It also means we
- * can handle stdcall and cdecl with the same code.
- */
-
- /* Note the various call_* functions want 32 bytes of 16 byte aligned
- * stack
- */
-
- |.if X64WIN
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov r9, arg3
- | mov r8, arg2
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov r8, arg2
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov rdx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_i, func, arg0
- | mov ecx, arg0
- | call func
- |.endmacro
- |
- |.elif X64
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov rcx, arg3
- | mov edx, arg2
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov rdx, arg2
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov rsi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_i, func, arg0
- | mov edi, arg0
- | call func
- |.endmacro
- |
- |.else
- | // define the 64bit registers to the 32 bit counterparts, so the common
- | // code can use r*x for all pointers
- |.define rax, eax
- |.define rcx, ecx
- |.define rdx, edx
- |.define rsp, esp
- |.define rbp, ebp
- |.define rdi, edi
- |.define rsi, esi
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov dword [rsp+12], arg3
- | mov dword [rsp+8], arg2
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov dword [rsp+8], arg2
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | call_pp, func, arg0, arg1
- |.endmacro
- |.macro call_i, func, arg0
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |
- |.endif
-
- |.if X64
- |.define L_ARG, r12
- |.else
- |.define L_ARG, rdi
- |.endif
-
- |.macro return
- |.if X64
- | mov L_ARG, [rbp-8]
- |.else
- | mov L_ARG, [rbp-4]
- |.endif
- | mov rsp, rbp
- | pop rbp
- | ret
- |.endmacro
-
- |.macro get_errno // note trashes registers
- | call &GetLastError
- | mov dword [&jit->last_errno], eax
- |.endmacro
-
- /* the general idea for the return functions is:
- * 1) Save return value on stack
- * 2) Call get_errno (this trashes the registers hence #1)
- * 3) Unpack return value from stack
- * 4) Call lua push function
- * 5) Set eax to number of returned args (0 or 1)
- * 6) Call return which pops our stack frame
- */
-
- |->return_void:
- | get_errno
- | mov eax, 0
- | return
-
- |->return_int:
- | mov [rsp+32], eax
- | get_errno
- |
- |.if X64WIN
- | cvtsi2sd xmm1, dword [rsp+32]
- | mov rcx, L_ARG
- |.elif X64
- | cvtsi2sd xmm0, dword [rsp+32]
- | mov rdi, L_ARG
- |.else
- | fild dword [rsp+32]
- | fstp qword [rsp+4]
- | mov [rsp], L_ARG
- |.endif
- |
- | call &lua_pushnumber
- | mov eax, 1
- | return
-
- |->return_double:
- |.if X64
- | movq qword [rsp+32], xmm0
- |.else
- | fstp qword [rsp+4] // note get_errno doesn't require any stack on x86
- |.endif
- |
- | get_errno
- |
- |.if X64WIN
- | movq xmm1, qword [rsp+32]
- | mov rcx, L_ARG
- |.elif X64
- | movq xmm0, qword [rsp+32]
- | mov rdi, L_ARG
- |.else
- | mov [rsp], L_ARG
- |.endif
- | call &lua_pushnumber
- | mov eax, 1
- | return
-
- |->return_bool:
- | mov [rsp+32], eax
- | get_errno
- | mov eax, [rsp+32]
- | call_pi, &lua_pushboolean, L_ARG, eax
- | mov eax, 1
- | return
-
- |->return_uint:
- | mov [rsp+32], eax
- | get_errno
- | mov eax, [rsp+32]
- | call_pi, &push_uint, L_ARG, eax
- | mov eax, 1
- | return
-
- |->too_few_arguments:
- | call_pp, &luaL_error, L_ARG, &"too few arguments"
-
- |->too_many_arguments:
- | call_pp, &luaL_error, L_ARG, &"too many arguments"
-
- compile(Dst, L);
-}
-
-int x86_stack_required(lua_State* L, int usr)
-{
- size_t i;
- size_t argn = lua_rawlen(L, usr);
- int ret = 0;
- for (i = 1; i <= argn; i++) {
- const ctype_t* ct;
- lua_rawgeti(L, usr, i);
- ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (ct->pointers) {
- ret += sizeof(void*);
- } else if (ct->type == DOUBLE_TYPE || ct->type == UINT64_TYPE || ct->type == INT64_TYPE) {
- ret += 8;
- } else if (ct->type == STRUCT_TYPE || ct->type == UNION_TYPE) {
- luaL_error(L, "NYI - structs as arguments");
- } else {
- /* other numeric types 4 bytes or less */
- ret += 4;
- }
-
- lua_pop(L, 1);
- }
-
- return ret;
-}
-
-#ifdef _WIN64
-#define MAX_REGISTERS 4 /* rcx, rdx, r8, r9 */
-#endif
-
-#ifdef __amd64__
-#define MAX_INT_REGISTERS 6 /* rdi, rsi, rdx, rcx, r8, r9 */
-#define MAX_FLOAT_REGISTERS 8 /* xmm0-7 */
-#endif
-
-struct reg_alloc {
-#ifdef _WIN64
- int regs;
- int is_float[4];
- int is_int[4];
-#elif defined __amd64__
- int floats;
- int ints;
-#endif
- size_t off;
-};
-
-#ifdef _WIN64
-#define REGISTER_STACK_SPACE (4*8)
-#elif defined __amd64__
-#define REGISTER_STACK_SPACE (14*8)
-#else
-#define REGISTER_STACK_SPACE 0
-#endif
-
-void add_int32(Dst_DECL, struct reg_alloc* reg)
-{
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | mov [rsp + 8*(reg->regs)], eax
- reg->is_int[reg->regs++] = 1;
- } else
-#elif defined __amd64__
- if (reg->ints >= MAX_INT_REGISTERS) {
- | mov [rsp + 8*reg->ints], eax
- reg->ints++;
- } else
-#endif
-
- {
- | mov [rsp+reg->off], eax
- reg->off += 4;
- }
-}
-
-void add_int64(Dst_DECL, struct reg_alloc* reg)
-{
-#if !defined _WIN64 && !defined __amd64__
- |.if not X64
- | mov [rsp + reg->off], RET_L
- | mov [rsp + reg->off + 4], RET_H
- |.endif
- reg->off += 8;
-#else
-
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | mov [rsp + 8*reg->regs], rax
- reg->is_int[reg->regs++] = 1;
-#else
- if (reg->ints >= MAX_INT_REGISTERS) {
- | mov [rsp + 8*reg->ints], rax
- reg->ints++;
-#endif
-
- } else {
- | mov [rsp + reg->off], rax
- reg->off += 8;
- }
-#endif
-}
-
-void add_double(Dst_DECL, struct reg_alloc* reg, int is_float)
-{
-#if !defined _WIN64 && !defined __amd64__
- |.if not X64
- if (is_float) {
- | fstp dword [rsp + reg->off]
- reg->off += 4;
- } else {
- | fstp qword [rsp + reg->off]
- reg->off += 8;
- }
-#else
- |.else
-
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | movq qword [rsp + 8*(reg->regs)], xmm0
- reg->is_float[reg->regs++] = 1;
-#else
- if (reg->floats >= MAX_FLOAT_REGISTERS) {
- | movq qword [rsp + 8*(MAX_INT_REGISTERS + reg->floats)], xmm0
- reg->floats++;
-#endif
-
- } else if (is_float) {
- | movd dword [rsp + reg->off], xmm0
- reg->off += 4;
- } else {
- | movq qword [rsp + reg->off], xmm0
- reg->off += 8;
- }
- |.endif
-#endif
-}
-
-#if defined _WIN64 || defined __amd64__
-#define add_pointer add_int64
-#else
-#define add_pointer add_int32
-#endif
-
-void push_function(jit_t* jit, lua_State* L, function_t func, int ct_usr, const ctype_t* ct)
-{
- size_t i, nargs;
- int num_upvals;
- const ctype_t* mbr_ct;
- jit_t* Dst = jit;
- struct reg_alloc regs;
-
- memset(&regs, 0, sizeof(regs));
- regs.off = 32 + REGISTER_STACK_SPACE;
-
- dasm_setup(Dst, build_actionlist);
-
- lua_pushvalue(L, ct_usr);
- ct_usr = lua_gettop(L);
- lua_pushvalue(L, CDATA_MT_UPVAL); /* so that CDATA_MT_UPVAL works within the closure */
- num_upvals = 2;
-
- nargs = lua_rawlen(L, ct_usr);
-
- if (ct->calling_convention != C_CALL && ct->calling_convention != STD_CALL) {
- luaL_error(L, "NYI: calling convention");
- }
-
-#if defined _WIN64 || defined __amd64__
- if (ct->has_var_arg) {
- luaL_error(L, "NYI: calling convention");
- }
-#endif
-
- | push rbp
- | mov rbp, rsp
- | push L_ARG
- | // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip)
- |
- |.if X64WIN
- | mov L_ARG, rcx
- | sub rsp, 40 // 8 bytes to realign, 32 bytes shadow space for lua_gettop
- | // leave rcx as is for call to lua_gettop
- |.elif X64
- | mov L_ARG, rdi
- | sub rsp, 8 // to realign (r)
- | // leave rdi as is for call to lua_gettop
- |.else
- | mov L_ARG, [rbp + 8]
- | push L_ARG // also realigns stack
- |.endif
- |
- | call &lua_gettop
- | cmp eax, nargs
- | jl ->too_few_arguments
-
- if (!ct->has_var_arg) {
- | jg ->too_many_arguments
- }
-
- /* note movzxd rax, eax should be used here except it doesn't exist since
- * x86-64 guarentees that the upper 32 bits will always be zeroed when setting
- * eax */
- | shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16
- | sub rsp, rax
- | sub rsp, 32 + REGISTER_STACK_SPACE // reserve an extra 32 to call local functions
-
- for (i = 1; i <= nargs; i++) {
- lua_rawgeti(L, ct_usr, i);
- mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (mbr_ct->pointers) {
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_pointer(Dst, &regs);
- } else {
- switch (mbr_ct->type) {
- case FUNCTION_TYPE:
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_pointer(Dst, &regs);
- break;
-
- case ENUM_TYPE:
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_int32(Dst, &regs);
- break;
-
- case INT8_TYPE:
- | call_pi, &to_int32, L_ARG, i
- | movsx eax, al
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case UINT8_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- | movzx eax, al
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case INT16_TYPE:
- | call_pi, &to_int32, L_ARG, i
- | movsx eax, ax
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case UINT16_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- | movzx eax, ax
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case INT32_TYPE:
- | call_pi, &to_int32, L_ARG, i
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case UINT32_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- add_int32(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case UINTPTR_TYPE:
- | call_pi, &to_uintptr, L_ARG, i
- add_pointer(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case INT64_TYPE:
- | call_pi, &to_int64, L_ARG, i
- add_int64(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case UINT64_TYPE:
- | call_pi, &to_uint64, L_ARG, i
- add_int64(Dst, &regs);
- lua_pop(L, 1);
- break;
-
- case DOUBLE_TYPE:
- | call_pi, &to_double, L_ARG, i
- add_double(Dst, &regs, 0);
- lua_pop(L, 1);
- break;
-
- case FLOAT_TYPE:
- | call_pi, &to_double, L_ARG, i
- add_double(Dst, &regs, 1);
- lua_pop(L, 1);
- break;
-
- default:
- luaL_error(L, "NYI: call arg type");
- }
- }
- }
-
- if (ct->has_var_arg) {
- | mov rax, rsp
- | add rax, regs.off
- | call_pip, &unpack_varargs, L_ARG, nargs+1, rax
-#ifdef _WIN64
- for (i = nargs; i < 4; i++) {
- regs.is_int[i] = regs.is_float[i] = 1;
- }
- regs.regs = 4;
-#elif defined __amd64__
- regs.floats = 8;
- regs.ints = 6;
-#endif
- }
-
- | mov eax, [&jit->last_errno]
- | call_i, &SetLastError, eax
-
- /* remove the stack space to call local functions */
- |.if X32WIN
- | add rsp, 28 // SetLastError will have already popped 4
- |.else
- | add rsp, 32
- |.endif
-
-#ifdef _WIN64
- |.if X64WIN
- switch (regs.regs) {
- case 4:
- if (regs.is_float[3]) {
- | movq xmm3, qword [rsp + 8*3]
- }
- if (regs.is_int[3]) {
- | mov r9, [rsp + 8*3]
- }
- case 3:
- if (regs.is_float[2]) {
- | movq xmm2, qword [rsp + 8*2]
- }
- if (regs.is_int[2]) {
- | mov r8, [rsp + 8*2]
- }
- case 2:
- if (regs.is_float[1]) {
- | movq xmm1, qword [rsp + 8*1]
- }
- if (regs.is_int[1]) {
- | mov rdx, [rsp + 8*1]
- }
- case 1:
- if (regs.is_float[0]) {
- | movq xmm0, qword [rsp]
- }
- if (regs.is_int[0]) {
- | mov rcx, [rsp]
- }
- case 0:
- break;
- }
-
- /* don't remove the space for the registers as we need 32 bytes of register overflow space */
- assert(REGISTER_STACK_SPACE == 32);
-
-#elif defined __amd64__
- |.elif X64
- switch (regs.floats) {
- case 8:
- | movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS+7)]
- case 7:
- | movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS+6)]
- case 6:
- | movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS+5)]
- case 5:
- | movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS+4)]
- case 4:
- | movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS+3)]
- case 3:
- | movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS+2)]
- case 2:
- | movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS+1)]
- case 1:
- | movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS)]
- case 0:
- break;
- }
-
- switch (regs.ints) {
- case 6:
- | mov r9, [rsp + 8*5]
- case 5:
- | mov r8, [rsp + 8*4]
- case 4:
- | mov rcx, [rsp + 8*3]
- case 3:
- | mov rdx, [rsp + 8*2]
- case 2:
- | mov rsi, [rsp + 8*1]
- case 1:
- | mov rdi, [rsp]
- case 0:
- break;
- }
-
- | add rsp, REGISTER_STACK_SPACE
- |.endif
-#endif
-
-#ifdef __amd64__
- if (ct->has_var_arg) {
- /* al stores an upper limit on the number of float register, note that
- * its allowed to be more than the actual number of float registers used as
- * long as its 0-8 */
- |.if X64 and not X64WIN
- | mov al, 8
- |.endif
- }
-#endif
-
- | call &func
- | sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables
-
- /* note on windows X86 the stack may be only aligned to 4 (stdcall will
- * have popped a multiple of 4 bytes), but we don't need 16 byte alignment on
- * that platform
- */
-
- lua_rawgeti(L, ct_usr, 0);
- mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (mbr_ct->pointers || mbr_ct->type == UINTPTR_TYPE) {
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | mov [rsp+32], rax // save the pointer
- | get_errno
- | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
- | mov rcx, [rsp+32]
- | mov [rax], rcx // *(void**) cdata = val
- | mov eax, 1
- | return
-
- } else {
- switch (mbr_ct->type) {
- case INT64_TYPE:
- case UINT64_TYPE:
- num_upvals++;
- | // save the return value
- |.if X64
- | mov [rsp+32], rax
- |.else
- | mov [rsp+36], edx // high
- | mov [rsp+32], eax // low
- |.endif
- |
- | get_errno
- | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
- |
- | // *(int64_t*) cdata = val
- |.if X64
- | mov rcx, [rsp+32]
- | mov [rax], rcx
- |.else
- | mov rcx, [rsp+36]
- | mov rdx, [rsp+32]
- | mov [rax+4], rcx
- | mov [rax], rdx
- |.endif
- |
- | mov eax, 1
- | return
- break;
-
- case VOID_TYPE:
- | jmp ->return_void
- lua_pop(L, 1);
- break;
-
- case BOOL_TYPE:
- | jmp ->return_bool
- lua_pop(L, 1);
- break;
-
- case INT8_TYPE:
- | movsx eax, al
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case INT16_TYPE:
- | movsx eax, ax
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT8_TYPE:
- | movzx eax, al
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT16_TYPE:
- | movzx eax, ax
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case INT32_TYPE:
- case ENUM_TYPE:
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT32_TYPE:
- | jmp ->return_uint
- lua_pop(L, 1);
- break;
-
- case DOUBLE_TYPE:
- case FLOAT_TYPE:
- | jmp ->return_double
- lua_pop(L, 1);
- break;
-
- default:
- luaL_error(L, "NYI: call return type");
- }
- }
-
- assert(lua_gettop(L) == ct_usr + num_upvals - 1);
- lua_pushcclosure(L, (lua_CFunction) compile(Dst, L), num_upvals);
-}
-
+|.if X64
+|.arch x64
+|.else
+|.arch x86
+|.endif
+
+|.actionlist build_actionlist
+|.globalnames globnames
+
+|.if not X64
+|.define RET_H, edx // for int64_t returns
+|.define RET_L, eax
+|.endif
+
+void compile_globals(jit_t* jit, lua_State* L)
+{
+ jit_t* Dst = jit;
+ dasm_setup(Dst, build_actionlist);
+
+ /* Note: since the return code uses EBP to reset the stack pointer, we
+ * don't have to track the amount of stack space used. It also means we
+ * can handle stdcall and cdecl with the same code.
+ */
+
+ /* Note the various call_* functions want 32 bytes of 16 byte aligned
+ * stack
+ */
+
+ |.if X64WIN
+ |.macro call_piip, func, arg0, arg1, arg2, arg3
+ | mov r9, arg3
+ | mov r8, arg2
+ | mov edx, arg1
+ | mov rcx, arg0
+ | call func
+ |.endmacro
+ |.macro call_pip, func, arg0, arg1, arg2
+ | mov r8, arg2
+ | mov edx, arg1
+ | mov rcx, arg0
+ | call func
+ |.endmacro
+ |.macro call_pp, func, arg0, arg1
+ | mov rdx, arg1
+ | mov rcx, arg0
+ | call func
+ |.endmacro
+ |.macro call_pi, func, arg0, arg1
+ | mov edx, arg1
+ | mov rcx, arg0
+ | call func
+ |.endmacro
+ |.macro call_i, func, arg0
+ | mov ecx, arg0
+ | call func
+ |.endmacro
+ |
+ |.elif X64
+ |.macro call_piip, func, arg0, arg1, arg2, arg3
+ | mov rcx, arg3
+ | mov edx, arg2
+ | mov esi, arg1
+ | mov rdi, arg0
+ | call func
+ |.endmacro
+ |.macro call_pip, func, arg0, arg1, arg2
+ | mov rdx, arg2
+ | mov esi, arg1
+ | mov rdi, arg0
+ | call func
+ |.endmacro
+ |.macro call_pp, func, arg0, arg1
+ | mov rsi, arg1
+ | mov rdi, arg0
+ | call func
+ |.endmacro
+ |.macro call_pi, func, arg0, arg1
+ | mov esi, arg1
+ | mov rdi, arg0
+ | call func
+ |.endmacro
+ |.macro call_i, func, arg0
+ | mov edi, arg0
+ | call func
+ |.endmacro
+ |
+ |.else
+ | // define the 64bit registers to the 32 bit counterparts, so the common
+ | // code can use r*x for all pointers
+ |.define rax, eax
+ |.define rcx, ecx
+ |.define rdx, edx
+ |.define rsp, esp
+ |.define rbp, ebp
+ |.define rdi, edi
+ |.define rsi, esi
+ |.macro call_piip, func, arg0, arg1, arg2, arg3
+ | mov dword [rsp+12], arg3
+ | mov dword [rsp+8], arg2
+ | mov dword [rsp+4], arg1
+ | mov dword [rsp], arg0
+ | call func
+ |.endmacro
+ |.macro call_pip, func, arg0, arg1, arg2
+ | mov dword [rsp+8], arg2
+ | mov dword [rsp+4], arg1
+ | mov dword [rsp], arg0
+ | call func
+ |.endmacro
+ |.macro call_pp, func, arg0, arg1
+ | mov dword [rsp+4], arg1
+ | mov dword [rsp], arg0
+ | call func
+ |.endmacro
+ |.macro call_pi, func, arg0, arg1
+ | call_pp, func, arg0, arg1
+ |.endmacro
+ |.macro call_i, func, arg0
+ | mov dword [rsp], arg0
+ | call func
+ |.endmacro
+ |
+ |.endif
+
+ |.if X64
+ |.define L_ARG, r12
+ |.else
+ |.define L_ARG, rdi
+ |.endif
+
+ |.macro return
+ |.if X64
+ | mov L_ARG, [rbp-8]
+ |.else
+ | mov L_ARG, [rbp-4]
+ |.endif
+ | mov rsp, rbp
+ | pop rbp
+ | ret
+ |.endmacro
+
+ |.macro get_errno // note trashes registers
+ | call &GetLastError
+ | mov dword [&jit->last_errno], eax
+ |.endmacro
+
+ /* the general idea for the return functions is:
+ * 1) Save return value on stack
+ * 2) Call get_errno (this trashes the registers hence #1)
+ * 3) Unpack return value from stack
+ * 4) Call lua push function
+ * 5) Set eax to number of returned args (0 or 1)
+ * 6) Call return which pops our stack frame
+ */
+
+ |->return_void:
+ | get_errno
+ | mov eax, 0
+ | return
+
+ |->return_int:
+ | mov [rsp+32], eax
+ | get_errno
+ |
+ |.if X64WIN
+ | cvtsi2sd xmm1, dword [rsp+32]
+ | mov rcx, L_ARG
+ |.elif X64
+ | cvtsi2sd xmm0, dword [rsp+32]
+ | mov rdi, L_ARG
+ |.else
+ | fild dword [rsp+32]
+ | fstp qword [rsp+4]
+ | mov [rsp], L_ARG
+ |.endif
+ |
+ | call &lua_pushnumber
+ | mov eax, 1
+ | return
+
+ |->return_double:
+ |.if X64
+ | movq qword [rsp+32], xmm0
+ |.else
+ | fstp qword [rsp+4] // note get_errno doesn't require any stack on x86
+ |.endif
+ |
+ | get_errno
+ |
+ |.if X64WIN
+ | movq xmm1, qword [rsp+32]
+ | mov rcx, L_ARG
+ |.elif X64
+ | movq xmm0, qword [rsp+32]
+ | mov rdi, L_ARG
+ |.else
+ | mov [rsp], L_ARG
+ |.endif
+ | call &lua_pushnumber
+ | mov eax, 1
+ | return
+
+ |->return_bool:
+ | mov [rsp+32], eax
+ | get_errno
+ | mov eax, [rsp+32]
+ | call_pi, &lua_pushboolean, L_ARG, eax
+ | mov eax, 1
+ | return
+
+ |->return_uint:
+ | mov [rsp+32], eax
+ | get_errno
+ | mov eax, [rsp+32]
+ | call_pi, &push_uint, L_ARG, eax
+ | mov eax, 1
+ | return
+
+ |->too_few_arguments:
+ | call_pp, &luaL_error, L_ARG, &"too few arguments"
+
+ |->too_many_arguments:
+ | call_pp, &luaL_error, L_ARG, &"too many arguments"
+
+ compile(Dst, L);
+}
+
+int x86_stack_required(lua_State* L, int usr)
+{
+ size_t i;
+ size_t argn = lua_rawlen(L, usr);
+ int ret = 0;
+ for (i = 1; i <= argn; i++) {
+ const ctype_t* ct;
+ lua_rawgeti(L, usr, i);
+ ct = (const ctype_t*) lua_touserdata(L, -1);
+
+ if (ct->pointers) {
+ ret += sizeof(void*);
+ } else if (ct->type == DOUBLE_TYPE || ct->type == UINT64_TYPE || ct->type == INT64_TYPE) {
+ ret += 8;
+ } else if (ct->type == STRUCT_TYPE || ct->type == UNION_TYPE) {
+ luaL_error(L, "NYI - structs as arguments");
+ } else {
+ /* other numeric types 4 bytes or less */
+ ret += 4;
+ }
+
+ lua_pop(L, 1);
+ }
+
+ return ret;
+}
+
+#ifdef _WIN64
+#define MAX_REGISTERS 4 /* rcx, rdx, r8, r9 */
+#endif
+
+#ifdef __amd64__
+#define MAX_INT_REGISTERS 6 /* rdi, rsi, rdx, rcx, r8, r9 */
+#define MAX_FLOAT_REGISTERS 8 /* xmm0-7 */
+#endif
+
+struct reg_alloc {
+#ifdef _WIN64
+ int regs;
+ int is_float[4];
+ int is_int[4];
+#elif defined __amd64__
+ int floats;
+ int ints;
+#endif
+ size_t off;
+};
+
+#ifdef _WIN64
+#define REGISTER_STACK_SPACE (4*8)
+#elif defined __amd64__
+#define REGISTER_STACK_SPACE (14*8)
+#else
+#define REGISTER_STACK_SPACE 0
+#endif
+
+void add_int32(Dst_DECL, struct reg_alloc* reg)
+{
+#ifdef _WIN64
+ if (reg->regs >= MAX_REGISTERS) {
+ | mov [rsp + 8*(reg->regs)], eax
+ reg->is_int[reg->regs++] = 1;
+ } else
+#elif defined __amd64__
+ if (reg->ints >= MAX_INT_REGISTERS) {
+ | mov [rsp + 8*reg->ints], eax
+ reg->ints++;
+ } else
+#endif
+
+ {
+ | mov [rsp+reg->off], eax
+ reg->off += 4;
+ }
+}
+
+void add_int64(Dst_DECL, struct reg_alloc* reg)
+{
+#if !defined _WIN64 && !defined __amd64__
+ |.if not X64
+ | mov [rsp + reg->off], RET_L
+ | mov [rsp + reg->off + 4], RET_H
+ |.endif
+ reg->off += 8;
+#else
+
+#ifdef _WIN64
+ if (reg->regs >= MAX_REGISTERS) {
+ | mov [rsp + 8*reg->regs], rax
+ reg->is_int[reg->regs++] = 1;
+#else
+ if (reg->ints >= MAX_INT_REGISTERS) {
+ | mov [rsp + 8*reg->ints], rax
+ reg->ints++;
+#endif
+
+ } else {
+ | mov [rsp + reg->off], rax
+ reg->off += 8;
+ }
+#endif
+}
+
+void add_double(Dst_DECL, struct reg_alloc* reg, int is_float)
+{
+#if !defined _WIN64 && !defined __amd64__
+ |.if not X64
+ if (is_float) {
+ | fstp dword [rsp + reg->off]
+ reg->off += 4;
+ } else {
+ | fstp qword [rsp + reg->off]
+ reg->off += 8;
+ }
+#else
+ |.else
+
+#ifdef _WIN64
+ if (reg->regs >= MAX_REGISTERS) {
+ | movq qword [rsp + 8*(reg->regs)], xmm0
+ reg->is_float[reg->regs++] = 1;
+#else
+ if (reg->floats >= MAX_FLOAT_REGISTERS) {
+ | movq qword [rsp + 8*(MAX_INT_REGISTERS + reg->floats)], xmm0
+ reg->floats++;
+#endif
+
+ } else if (is_float) {
+ | movd dword [rsp + reg->off], xmm0
+ reg->off += 4;
+ } else {
+ | movq qword [rsp + reg->off], xmm0
+ reg->off += 8;
+ }
+ |.endif
+#endif
+}
+
+#if defined _WIN64 || defined __amd64__
+#define add_pointer add_int64
+#else
+#define add_pointer add_int32
+#endif
+
+void push_function(jit_t* jit, lua_State* L, function_t func, int ct_usr, const ctype_t* ct)
+{
+ size_t i, nargs;
+ int num_upvals;
+ const ctype_t* mbr_ct;
+ jit_t* Dst = jit;
+ struct reg_alloc regs;
+
+ memset(&regs, 0, sizeof(regs));
+ regs.off = 32 + REGISTER_STACK_SPACE;
+
+ dasm_setup(Dst, build_actionlist);
+
+ lua_pushvalue(L, ct_usr);
+ ct_usr = lua_gettop(L);
+ lua_pushvalue(L, CDATA_MT_UPVAL); /* so that CDATA_MT_UPVAL works within the closure */
+ num_upvals = 2;
+
+ nargs = lua_rawlen(L, ct_usr);
+
+ if (ct->calling_convention != C_CALL && ct->calling_convention != STD_CALL) {
+ luaL_error(L, "NYI: calling convention");
+ }
+
+#if defined _WIN64 || defined __amd64__
+ if (ct->has_var_arg) {
+ luaL_error(L, "NYI: calling convention");
+ }
+#endif
+
+ | push rbp
+ | mov rbp, rsp
+ | push L_ARG
+ | // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip)
+ |
+ |.if X64WIN
+ | mov L_ARG, rcx
+ | sub rsp, 40 // 8 bytes to realign, 32 bytes shadow space for lua_gettop
+ | // leave rcx as is for call to lua_gettop
+ |.elif X64
+ | mov L_ARG, rdi
+ | sub rsp, 8 // to realign (r)
+ | // leave rdi as is for call to lua_gettop
+ |.else
+ | mov L_ARG, [rbp + 8]
+ | push L_ARG // also realigns stack
+ |.endif
+ |
+ | call &lua_gettop
+ | cmp eax, nargs
+ | jl ->too_few_arguments
+
+ if (!ct->has_var_arg) {
+ | jg ->too_many_arguments
+ }
+
+ /* note movzxd rax, eax should be used here except it doesn't exist since
+ * x86-64 guarentees that the upper 32 bits will always be zeroed when setting
+ * eax */
+ | shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16
+ | sub rsp, rax
+ | sub rsp, 32 + REGISTER_STACK_SPACE // reserve an extra 32 to call local functions
+
+ for (i = 1; i <= nargs; i++) {
+ lua_rawgeti(L, ct_usr, i);
+ mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
+
+ if (mbr_ct->pointers) {
+ lua_getuservalue(L, -1);
+ num_upvals += 2;
+ | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
+ add_pointer(Dst, &regs);
+ } else {
+ switch (mbr_ct->type) {
+ case FUNCTION_TYPE:
+ lua_getuservalue(L, -1);
+ num_upvals += 2;
+ | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
+ add_pointer(Dst, &regs);
+ break;
+
+ case ENUM_TYPE:
+ lua_getuservalue(L, -1);
+ num_upvals += 2;
+ | call_piip, &to_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
+ add_int32(Dst, &regs);
+ break;
+
+ case INT8_TYPE:
+ | call_pi, &to_int32, L_ARG, i
+ | movsx eax, al
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case UINT8_TYPE:
+ | call_pi, &to_uint32, L_ARG, i
+ | movzx eax, al
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case INT16_TYPE:
+ | call_pi, &to_int32, L_ARG, i
+ | movsx eax, ax
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case UINT16_TYPE:
+ | call_pi, &to_uint32, L_ARG, i
+ | movzx eax, ax
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case INT32_TYPE:
+ | call_pi, &to_int32, L_ARG, i
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case UINT32_TYPE:
+ | call_pi, &to_uint32, L_ARG, i
+ add_int32(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case UINTPTR_TYPE:
+ | call_pi, &to_uintptr, L_ARG, i
+ add_pointer(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case INT64_TYPE:
+ | call_pi, &to_int64, L_ARG, i
+ add_int64(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case UINT64_TYPE:
+ | call_pi, &to_uint64, L_ARG, i
+ add_int64(Dst, &regs);
+ lua_pop(L, 1);
+ break;
+
+ case DOUBLE_TYPE:
+ | call_pi, &to_double, L_ARG, i
+ add_double(Dst, &regs, 0);
+ lua_pop(L, 1);
+ break;
+
+ case FLOAT_TYPE:
+ | call_pi, &to_double, L_ARG, i
+ add_double(Dst, &regs, 1);
+ lua_pop(L, 1);
+ break;
+
+ default:
+ luaL_error(L, "NYI: call arg type");
+ }
+ }
+ }
+
+ if (ct->has_var_arg) {
+ | mov rax, rsp
+ | add rax, regs.off
+ | call_pip, &unpack_varargs, L_ARG, nargs+1, rax
+#ifdef _WIN64
+ for (i = nargs; i < 4; i++) {
+ regs.is_int[i] = regs.is_float[i] = 1;
+ }
+ regs.regs = 4;
+#elif defined __amd64__
+ regs.floats = 8;
+ regs.ints = 6;
+#endif
+ }
+
+ | mov eax, [&jit->last_errno]
+ | call_i, &SetLastError, eax
+
+ /* remove the stack space to call local functions */
+ |.if X32WIN
+ | add rsp, 28 // SetLastError will have already popped 4
+ |.else
+ | add rsp, 32
+ |.endif
+
+#ifdef _WIN64
+ |.if X64WIN
+ switch (regs.regs) {
+ case 4:
+ if (regs.is_float[3]) {
+ | movq xmm3, qword [rsp + 8*3]
+ }
+ if (regs.is_int[3]) {
+ | mov r9, [rsp + 8*3]
+ }
+ case 3:
+ if (regs.is_float[2]) {
+ | movq xmm2, qword [rsp + 8*2]
+ }
+ if (regs.is_int[2]) {
+ | mov r8, [rsp + 8*2]
+ }
+ case 2:
+ if (regs.is_float[1]) {
+ | movq xmm1, qword [rsp + 8*1]
+ }
+ if (regs.is_int[1]) {
+ | mov rdx, [rsp + 8*1]
+ }
+ case 1:
+ if (regs.is_float[0]) {
+ | movq xmm0, qword [rsp]
+ }
+ if (regs.is_int[0]) {
+ | mov rcx, [rsp]
+ }
+ case 0:
+ break;
+ }
+
+ /* don't remove the space for the registers as we need 32 bytes of register overflow space */
+ assert(REGISTER_STACK_SPACE == 32);
+
+#elif defined __amd64__
+ |.elif X64
+ switch (regs.floats) {
+ case 8:
+ | movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS+7)]
+ case 7:
+ | movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS+6)]
+ case 6:
+ | movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS+5)]
+ case 5:
+ | movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS+4)]
+ case 4:
+ | movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS+3)]
+ case 3:
+ | movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS+2)]
+ case 2:
+ | movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS+1)]
+ case 1:
+ | movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS)]
+ case 0:
+ break;
+ }
+
+ switch (regs.ints) {
+ case 6:
+ | mov r9, [rsp + 8*5]
+ case 5:
+ | mov r8, [rsp + 8*4]
+ case 4:
+ | mov rcx, [rsp + 8*3]
+ case 3:
+ | mov rdx, [rsp + 8*2]
+ case 2:
+ | mov rsi, [rsp + 8*1]
+ case 1:
+ | mov rdi, [rsp]
+ case 0:
+ break;
+ }
+
+ | add rsp, REGISTER_STACK_SPACE
+ |.endif
+#endif
+
+#ifdef __amd64__
+ if (ct->has_var_arg) {
+ /* al stores an upper limit on the number of float register, note that
+ * its allowed to be more than the actual number of float registers used as
+ * long as its 0-8 */
+ |.if X64 and not X64WIN
+ | mov al, 8
+ |.endif
+ }
+#endif
+
+ | call &func
+ | sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables
+
+ /* note on windows X86 the stack may be only aligned to 4 (stdcall will
+ * have popped a multiple of 4 bytes), but we don't need 16 byte alignment on
+ * that platform
+ */
+
+ lua_rawgeti(L, ct_usr, 0);
+ mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
+
+ if (mbr_ct->pointers || mbr_ct->type == UINTPTR_TYPE) {
+ lua_getuservalue(L, -1);
+ num_upvals += 2;
+ | mov [rsp+32], rax // save the pointer
+ | get_errno
+ | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
+ | mov rcx, [rsp+32]
+ | mov [rax], rcx // *(void**) cdata = val
+ | mov eax, 1
+ | return
+
+ } else {
+ switch (mbr_ct->type) {
+ case INT64_TYPE:
+ case UINT64_TYPE:
+ num_upvals++;
+ | // save the return value
+ |.if X64
+ | mov [rsp+32], rax
+ |.else
+ | mov [rsp+36], edx // high
+ | mov [rsp+32], eax // low
+ |.endif
+ |
+ | get_errno
+ | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
+ |
+ | // *(int64_t*) cdata = val
+ |.if X64
+ | mov rcx, [rsp+32]
+ | mov [rax], rcx
+ |.else
+ | mov rcx, [rsp+36]
+ | mov rdx, [rsp+32]
+ | mov [rax+4], rcx
+ | mov [rax], rdx
+ |.endif
+ |
+ | mov eax, 1
+ | return
+ break;
+
+ case VOID_TYPE:
+ | jmp ->return_void
+ lua_pop(L, 1);
+ break;
+
+ case BOOL_TYPE:
+ | jmp ->return_bool
+ lua_pop(L, 1);
+ break;
+
+ case INT8_TYPE:
+ | movsx eax, al
+ | jmp ->return_int
+ lua_pop(L, 1);
+ break;
+
+ case INT16_TYPE:
+ | movsx eax, ax
+ | jmp ->return_int
+ lua_pop(L, 1);
+ break;
+
+ case UINT8_TYPE:
+ | movzx eax, al
+ | jmp ->return_int
+ lua_pop(L, 1);
+ break;
+
+ case UINT16_TYPE:
+ | movzx eax, ax
+ | jmp ->return_int
+ lua_pop(L, 1);
+ break;
+
+ case INT32_TYPE:
+ case ENUM_TYPE:
+ | jmp ->return_int
+ lua_pop(L, 1);
+ break;
+
+ case UINT32_TYPE:
+ | jmp ->return_uint
+ lua_pop(L, 1);
+ break;
+
+ case DOUBLE_TYPE:
+ case FLOAT_TYPE:
+ | jmp ->return_double
+ lua_pop(L, 1);
+ break;
+
+ default:
+ luaL_error(L, "NYI: call return type");
+ }
+ }
+
+ assert(lua_gettop(L) == ct_usr + num_upvals - 1);
+ lua_pushcclosure(L, (lua_CFunction) compile(Dst, L), num_upvals);
+}
+