diff options
author | James R. McKaskill <jmckaskill@gmail.com> | 2011-07-15 04:34:02 +0400 |
---|---|---|
committer | James R. McKaskill <jmckaskill@gmail.com> | 2011-07-15 04:34:02 +0400 |
commit | b8f7239faa544b7fc936747a952d4912385a1835 (patch) | |
tree | 3a2569838bbfd0d8f42f76f58afa7c9082ad1830 /call_x86.dasc | |
parent | 24b26c8e59fd7b8f0556b06d7748df70ba503af9 (diff) |
Normalising line endings
Diffstat (limited to 'call_x86.dasc')
-rw-r--r-- | call_x86.dasc | 1532 |
1 files changed, 766 insertions, 766 deletions
diff --git a/call_x86.dasc b/call_x86.dasc index cd6ab15..224f02a 100644 --- a/call_x86.dasc +++ b/call_x86.dasc @@ -1,766 +1,766 @@ -|.if X64
-|.arch x64
-|.else
-|.arch x86
-|.endif
-
-|.actionlist build_actionlist
-|.globalnames globnames
-
-|.if not X64
-|.define RET_H, edx // for int64_t returns
-|.define RET_L, eax
-|.endif
-
-void compile_globals(jit_t* jit, lua_State* L)
-{
- jit_t* Dst = jit;
- dasm_setup(Dst, build_actionlist);
-
- /* Note: since the return code uses EBP to reset the stack pointer, we
- * don't have to track the amount of stack space used. It also means we
- * can handle stdcall and cdecl with the same code.
- */
-
- /* Note the various call_* functions want 32 bytes of 16 byte aligned
- * stack
- */
-
- |.if X64WIN
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov r9, arg3
- | mov r8, arg2
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov r8, arg2
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov rdx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | mov edx, arg1
- | mov rcx, arg0
- | call func
- |.endmacro
- |.macro call_i, func, arg0
- | mov ecx, arg0
- | call func
- |.endmacro
- |
- |.elif X64
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov rcx, arg3
- | mov edx, arg2
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov rdx, arg2
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov rsi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | mov esi, arg1
- | mov rdi, arg0
- | call func
- |.endmacro
- |.macro call_i, func, arg0
- | mov edi, arg0
- | call func
- |.endmacro
- |
- |.else
- | // define the 64bit registers to the 32 bit counterparts, so the common
- | // code can use r*x for all pointers
- |.define rax, eax
- |.define rcx, ecx
- |.define rdx, edx
- |.define rsp, esp
- |.define rbp, ebp
- |.define rdi, edi
- |.define rsi, esi
- |.macro call_piip, func, arg0, arg1, arg2, arg3
- | mov dword [rsp+12], arg3
- | mov dword [rsp+8], arg2
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pip, func, arg0, arg1, arg2
- | mov dword [rsp+8], arg2
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pp, func, arg0, arg1
- | mov dword [rsp+4], arg1
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |.macro call_pi, func, arg0, arg1
- | call_pp, func, arg0, arg1
- |.endmacro
- |.macro call_i, func, arg0
- | mov dword [rsp], arg0
- | call func
- |.endmacro
- |
- |.endif
-
- |.if X64
- |.define L_ARG, r12
- |.else
- |.define L_ARG, rdi
- |.endif
-
- |.macro return
- |.if X64
- | mov L_ARG, [rbp-8]
- |.else
- | mov L_ARG, [rbp-4]
- |.endif
- | mov rsp, rbp
- | pop rbp
- | ret
- |.endmacro
-
- |.macro get_errno // note trashes registers
- | call &GetLastError
- | mov dword [&jit->last_errno], eax
- |.endmacro
-
- /* the general idea for the return functions is:
- * 1) Save return value on stack
- * 2) Call get_errno (this trashes the registers hence #1)
- * 3) Unpack return value from stack
- * 4) Call lua push function
- * 5) Set eax to number of returned args (0 or 1)
- * 6) Call return which pops our stack frame
- */
-
- |->return_void:
- | get_errno
- | mov eax, 0
- | return
-
- |->return_int:
- | mov [rsp+32], eax
- | get_errno
- |
- |.if X64WIN
- | cvtsi2sd xmm1, dword [rsp+32]
- | mov rcx, L_ARG
- |.elif X64
- | cvtsi2sd xmm0, dword [rsp+32]
- | mov rdi, L_ARG
- |.else
- | fild dword [rsp+32]
- | fstp qword [rsp+4]
- | mov [rsp], L_ARG
- |.endif
- |
- | call &lua_pushnumber
- | mov eax, 1
- | return
-
- |->return_double:
- |.if X64
- | movq qword [rsp+32], xmm0
- |.else
- | fstp qword [rsp+4] // note get_errno doesn't require any stack on x86
- |.endif
- |
- | get_errno
- |
- |.if X64WIN
- | movq xmm1, qword [rsp+32]
- | mov rcx, L_ARG
- |.elif X64
- | movq xmm0, qword [rsp+32]
- | mov rdi, L_ARG
- |.else
- | mov [rsp], L_ARG
- |.endif
- | call &lua_pushnumber
- | mov eax, 1
- | return
-
- |->return_bool:
- | mov [rsp+32], eax
- | get_errno
- | mov eax, [rsp+32]
- | call_pi, &lua_pushboolean, L_ARG, eax
- | mov eax, 1
- | return
-
- |->return_uint:
- | mov [rsp+32], eax
- | get_errno
- | mov eax, [rsp+32]
- | call_pi, &push_uint, L_ARG, eax
- | mov eax, 1
- | return
-
- |->too_few_arguments:
- | call_pp, &luaL_error, L_ARG, &"too few arguments"
-
- |->too_many_arguments:
- | call_pp, &luaL_error, L_ARG, &"too many arguments"
-
- compile(Dst, L);
-}
-
-int x86_stack_required(lua_State* L, int usr)
-{
- size_t i;
- size_t argn = lua_rawlen(L, usr);
- int ret = 0;
- for (i = 1; i <= argn; i++) {
- const ctype_t* ct;
- lua_rawgeti(L, usr, i);
- ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (ct->pointers) {
- ret += sizeof(void*);
- } else if (ct->type == DOUBLE_TYPE || ct->type == UINT64_TYPE || ct->type == INT64_TYPE) {
- ret += 8;
- } else if (ct->type == STRUCT_TYPE || ct->type == UNION_TYPE) {
- luaL_error(L, "NYI - structs as arguments");
- } else {
- /* other numeric types 4 bytes or less */
- ret += 4;
- }
-
- lua_pop(L, 1);
- }
-
- return ret;
-}
-
-#ifdef _WIN64
-#define MAX_REGISTERS 4 /* rcx, rdx, r8, r9 */
-#endif
-
-#ifdef __amd64__
-#define MAX_INT_REGISTERS 6 /* rdi, rsi, rdx, rcx, r8, r9 */
-#define MAX_FLOAT_REGISTERS 8 /* xmm0-7 */
-#endif
-
-struct reg_alloc {
-#ifdef _WIN64
- int regs;
- int is_float[4];
- int is_int[4];
-#elif defined __amd64__
- int floats;
- int ints;
-#endif
- size_t off;
-};
-
-#ifdef _WIN64
-#define REGISTER_STACK_SPACE (4*8)
-#elif defined __amd64__
-#define REGISTER_STACK_SPACE (14*8)
-#else
-#define REGISTER_STACK_SPACE 0
-#endif
-
-void add_int32(Dst_DECL, struct reg_alloc* reg)
-{
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | mov [rsp + 8*(reg->regs)], eax
- reg->is_int[reg->regs++] = 1;
- } else
-#elif defined __amd64__
- if (reg->ints >= MAX_INT_REGISTERS) {
- | mov [rsp + 8*reg->ints], eax
- reg->ints++;
- } else
-#endif
-
- {
- | mov [rsp+reg->off], eax
- reg->off += 4;
- }
-}
-
-void add_int64(Dst_DECL, struct reg_alloc* reg)
-{
-#if !defined _WIN64 && !defined __amd64__
- |.if not X64
- | mov [rsp + reg->off], RET_L
- | mov [rsp + reg->off + 4], RET_H
- |.endif
- reg->off += 8;
-#else
-
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | mov [rsp + 8*reg->regs], rax
- reg->is_int[reg->regs++] = 1;
-#else
- if (reg->ints >= MAX_INT_REGISTERS) {
- | mov [rsp + 8*reg->ints], rax
- reg->ints++;
-#endif
-
- } else {
- | mov [rsp + reg->off], rax
- reg->off += 8;
- }
-#endif
-}
-
-void add_double(Dst_DECL, struct reg_alloc* reg, int is_float)
-{
-#if !defined _WIN64 && !defined __amd64__
- |.if not X64
- if (is_float) {
- | fstp dword [rsp + reg->off]
- reg->off += 4;
- } else {
- | fstp qword [rsp + reg->off]
- reg->off += 8;
- }
-#else
- |.else
-
-#ifdef _WIN64
- if (reg->regs >= MAX_REGISTERS) {
- | movq qword [rsp + 8*(reg->regs)], xmm0
- reg->is_float[reg->regs++] = 1;
-#else
- if (reg->floats >= MAX_FLOAT_REGISTERS) {
- | movq qword [rsp + 8*(MAX_INT_REGISTERS + reg->floats)], xmm0
- reg->floats++;
-#endif
-
- } else if (is_float) {
- | movd dword [rsp + reg->off], xmm0
- reg->off += 4;
- } else {
- | movq qword [rsp + reg->off], xmm0
- reg->off += 8;
- }
- |.endif
-#endif
-}
-
-#if defined _WIN64 || defined __amd64__
-#define add_pointer add_int64
-#else
-#define add_pointer add_int32
-#endif
-
-void push_function(jit_t* jit, lua_State* L, function_t func, int ct_usr, const ctype_t* ct)
-{
- size_t i, nargs;
- int num_upvals;
- const ctype_t* mbr_ct;
- jit_t* Dst = jit;
- struct reg_alloc regs;
-
- memset(®s, 0, sizeof(regs));
- regs.off = 32 + REGISTER_STACK_SPACE;
-
- dasm_setup(Dst, build_actionlist);
-
- lua_pushvalue(L, ct_usr);
- ct_usr = lua_gettop(L);
- lua_pushvalue(L, CDATA_MT_UPVAL); /* so that CDATA_MT_UPVAL works within the closure */
- num_upvals = 2;
-
- nargs = lua_rawlen(L, ct_usr);
-
- if (ct->calling_convention != C_CALL && ct->calling_convention != STD_CALL) {
- luaL_error(L, "NYI: calling convention");
- }
-
-#if defined _WIN64 || defined __amd64__
- if (ct->has_var_arg) {
- luaL_error(L, "NYI: calling convention");
- }
-#endif
-
- | push rbp
- | mov rbp, rsp
- | push L_ARG
- | // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip)
- |
- |.if X64WIN
- | mov L_ARG, rcx
- | sub rsp, 40 // 8 bytes to realign, 32 bytes shadow space for lua_gettop
- | // leave rcx as is for call to lua_gettop
- |.elif X64
- | mov L_ARG, rdi
- | sub rsp, 8 // to realign (r)
- | // leave rdi as is for call to lua_gettop
- |.else
- | mov L_ARG, [rbp + 8]
- | push L_ARG // also realigns stack
- |.endif
- |
- | call &lua_gettop
- | cmp eax, nargs
- | jl ->too_few_arguments
-
- if (!ct->has_var_arg) {
- | jg ->too_many_arguments
- }
-
- /* note movzxd rax, eax should be used here except it doesn't exist since
- * x86-64 guarentees that the upper 32 bits will always be zeroed when setting
- * eax */
- | shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16
- | sub rsp, rax
- | sub rsp, 32 + REGISTER_STACK_SPACE // reserve an extra 32 to call local functions
-
- for (i = 1; i <= nargs; i++) {
- lua_rawgeti(L, ct_usr, i);
- mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (mbr_ct->pointers) {
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_pointer(Dst, ®s);
- } else {
- switch (mbr_ct->type) {
- case FUNCTION_TYPE:
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_pointer(Dst, ®s);
- break;
-
- case ENUM_TYPE:
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | call_piip, &to_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
- add_int32(Dst, ®s);
- break;
-
- case INT8_TYPE:
- | call_pi, &to_int32, L_ARG, i
- | movsx eax, al
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case UINT8_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- | movzx eax, al
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case INT16_TYPE:
- | call_pi, &to_int32, L_ARG, i
- | movsx eax, ax
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case UINT16_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- | movzx eax, ax
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case INT32_TYPE:
- | call_pi, &to_int32, L_ARG, i
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case UINT32_TYPE:
- | call_pi, &to_uint32, L_ARG, i
- add_int32(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case UINTPTR_TYPE:
- | call_pi, &to_uintptr, L_ARG, i
- add_pointer(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case INT64_TYPE:
- | call_pi, &to_int64, L_ARG, i
- add_int64(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case UINT64_TYPE:
- | call_pi, &to_uint64, L_ARG, i
- add_int64(Dst, ®s);
- lua_pop(L, 1);
- break;
-
- case DOUBLE_TYPE:
- | call_pi, &to_double, L_ARG, i
- add_double(Dst, ®s, 0);
- lua_pop(L, 1);
- break;
-
- case FLOAT_TYPE:
- | call_pi, &to_double, L_ARG, i
- add_double(Dst, ®s, 1);
- lua_pop(L, 1);
- break;
-
- default:
- luaL_error(L, "NYI: call arg type");
- }
- }
- }
-
- if (ct->has_var_arg) {
- | mov rax, rsp
- | add rax, regs.off
- | call_pip, &unpack_varargs, L_ARG, nargs+1, rax
-#ifdef _WIN64
- for (i = nargs; i < 4; i++) {
- regs.is_int[i] = regs.is_float[i] = 1;
- }
- regs.regs = 4;
-#elif defined __amd64__
- regs.floats = 8;
- regs.ints = 6;
-#endif
- }
-
- | mov eax, [&jit->last_errno]
- | call_i, &SetLastError, eax
-
- /* remove the stack space to call local functions */
- |.if X32WIN
- | add rsp, 28 // SetLastError will have already popped 4
- |.else
- | add rsp, 32
- |.endif
-
-#ifdef _WIN64
- |.if X64WIN
- switch (regs.regs) {
- case 4:
- if (regs.is_float[3]) {
- | movq xmm3, qword [rsp + 8*3]
- }
- if (regs.is_int[3]) {
- | mov r9, [rsp + 8*3]
- }
- case 3:
- if (regs.is_float[2]) {
- | movq xmm2, qword [rsp + 8*2]
- }
- if (regs.is_int[2]) {
- | mov r8, [rsp + 8*2]
- }
- case 2:
- if (regs.is_float[1]) {
- | movq xmm1, qword [rsp + 8*1]
- }
- if (regs.is_int[1]) {
- | mov rdx, [rsp + 8*1]
- }
- case 1:
- if (regs.is_float[0]) {
- | movq xmm0, qword [rsp]
- }
- if (regs.is_int[0]) {
- | mov rcx, [rsp]
- }
- case 0:
- break;
- }
-
- /* don't remove the space for the registers as we need 32 bytes of register overflow space */
- assert(REGISTER_STACK_SPACE == 32);
-
-#elif defined __amd64__
- |.elif X64
- switch (regs.floats) {
- case 8:
- | movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS+7)]
- case 7:
- | movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS+6)]
- case 6:
- | movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS+5)]
- case 5:
- | movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS+4)]
- case 4:
- | movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS+3)]
- case 3:
- | movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS+2)]
- case 2:
- | movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS+1)]
- case 1:
- | movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS)]
- case 0:
- break;
- }
-
- switch (regs.ints) {
- case 6:
- | mov r9, [rsp + 8*5]
- case 5:
- | mov r8, [rsp + 8*4]
- case 4:
- | mov rcx, [rsp + 8*3]
- case 3:
- | mov rdx, [rsp + 8*2]
- case 2:
- | mov rsi, [rsp + 8*1]
- case 1:
- | mov rdi, [rsp]
- case 0:
- break;
- }
-
- | add rsp, REGISTER_STACK_SPACE
- |.endif
-#endif
-
-#ifdef __amd64__
- if (ct->has_var_arg) {
- /* al stores an upper limit on the number of float register, note that
- * its allowed to be more than the actual number of float registers used as
- * long as its 0-8 */
- |.if X64 and not X64WIN
- | mov al, 8
- |.endif
- }
-#endif
-
- | call &func
- | sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables
-
- /* note on windows X86 the stack may be only aligned to 4 (stdcall will
- * have popped a multiple of 4 bytes), but we don't need 16 byte alignment on
- * that platform
- */
-
- lua_rawgeti(L, ct_usr, 0);
- mbr_ct = (const ctype_t*) lua_touserdata(L, -1);
-
- if (mbr_ct->pointers || mbr_ct->type == UINTPTR_TYPE) {
- lua_getuservalue(L, -1);
- num_upvals += 2;
- | mov [rsp+32], rax // save the pointer
- | get_errno
- | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
- | mov rcx, [rsp+32]
- | mov [rax], rcx // *(void**) cdata = val
- | mov eax, 1
- | return
-
- } else {
- switch (mbr_ct->type) {
- case INT64_TYPE:
- case UINT64_TYPE:
- num_upvals++;
- | // save the return value
- |.if X64
- | mov [rsp+32], rax
- |.else
- | mov [rsp+36], edx // high
- | mov [rsp+32], eax // low
- |.endif
- |
- | get_errno
- | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
- |
- | // *(int64_t*) cdata = val
- |.if X64
- | mov rcx, [rsp+32]
- | mov [rax], rcx
- |.else
- | mov rcx, [rsp+36]
- | mov rdx, [rsp+32]
- | mov [rax+4], rcx
- | mov [rax], rdx
- |.endif
- |
- | mov eax, 1
- | return
- break;
-
- case VOID_TYPE:
- | jmp ->return_void
- lua_pop(L, 1);
- break;
-
- case BOOL_TYPE:
- | jmp ->return_bool
- lua_pop(L, 1);
- break;
-
- case INT8_TYPE:
- | movsx eax, al
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case INT16_TYPE:
- | movsx eax, ax
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT8_TYPE:
- | movzx eax, al
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT16_TYPE:
- | movzx eax, ax
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case INT32_TYPE:
- case ENUM_TYPE:
- | jmp ->return_int
- lua_pop(L, 1);
- break;
-
- case UINT32_TYPE:
- | jmp ->return_uint
- lua_pop(L, 1);
- break;
-
- case DOUBLE_TYPE:
- case FLOAT_TYPE:
- | jmp ->return_double
- lua_pop(L, 1);
- break;
-
- default:
- luaL_error(L, "NYI: call return type");
- }
- }
-
- assert(lua_gettop(L) == ct_usr + num_upvals - 1);
- lua_pushcclosure(L, (lua_CFunction) compile(Dst, L), num_upvals);
-}
-
+|.if X64 +|.arch x64 +|.else +|.arch x86 +|.endif + +|.actionlist build_actionlist +|.globalnames globnames + +|.if not X64 +|.define RET_H, edx // for int64_t returns +|.define RET_L, eax +|.endif + +void compile_globals(jit_t* jit, lua_State* L) +{ + jit_t* Dst = jit; + dasm_setup(Dst, build_actionlist); + + /* Note: since the return code uses EBP to reset the stack pointer, we + * don't have to track the amount of stack space used. It also means we + * can handle stdcall and cdecl with the same code. + */ + + /* Note the various call_* functions want 32 bytes of 16 byte aligned + * stack + */ + + |.if X64WIN + |.macro call_piip, func, arg0, arg1, arg2, arg3 + | mov r9, arg3 + | mov r8, arg2 + | mov edx, arg1 + | mov rcx, arg0 + | call func + |.endmacro + |.macro call_pip, func, arg0, arg1, arg2 + | mov r8, arg2 + | mov edx, arg1 + | mov rcx, arg0 + | call func + |.endmacro + |.macro call_pp, func, arg0, arg1 + | mov rdx, arg1 + | mov rcx, arg0 + | call func + |.endmacro + |.macro call_pi, func, arg0, arg1 + | mov edx, arg1 + | mov rcx, arg0 + | call func + |.endmacro + |.macro call_i, func, arg0 + | mov ecx, arg0 + | call func + |.endmacro + | + |.elif X64 + |.macro call_piip, func, arg0, arg1, arg2, arg3 + | mov rcx, arg3 + | mov edx, arg2 + | mov esi, arg1 + | mov rdi, arg0 + | call func + |.endmacro + |.macro call_pip, func, arg0, arg1, arg2 + | mov rdx, arg2 + | mov esi, arg1 + | mov rdi, arg0 + | call func + |.endmacro + |.macro call_pp, func, arg0, arg1 + | mov rsi, arg1 + | mov rdi, arg0 + | call func + |.endmacro + |.macro call_pi, func, arg0, arg1 + | mov esi, arg1 + | mov rdi, arg0 + | call func + |.endmacro + |.macro call_i, func, arg0 + | mov edi, arg0 + | call func + |.endmacro + | + |.else + | // define the 64bit registers to the 32 bit counterparts, so the common + | // code can use r*x for all pointers + |.define rax, eax + |.define rcx, ecx + |.define rdx, edx + |.define rsp, esp + |.define rbp, ebp + |.define rdi, edi + |.define rsi, esi + |.macro call_piip, func, arg0, arg1, arg2, arg3 + | mov dword [rsp+12], arg3 + | mov dword [rsp+8], arg2 + | mov dword [rsp+4], arg1 + | mov dword [rsp], arg0 + | call func + |.endmacro + |.macro call_pip, func, arg0, arg1, arg2 + | mov dword [rsp+8], arg2 + | mov dword [rsp+4], arg1 + | mov dword [rsp], arg0 + | call func + |.endmacro + |.macro call_pp, func, arg0, arg1 + | mov dword [rsp+4], arg1 + | mov dword [rsp], arg0 + | call func + |.endmacro + |.macro call_pi, func, arg0, arg1 + | call_pp, func, arg0, arg1 + |.endmacro + |.macro call_i, func, arg0 + | mov dword [rsp], arg0 + | call func + |.endmacro + | + |.endif + + |.if X64 + |.define L_ARG, r12 + |.else + |.define L_ARG, rdi + |.endif + + |.macro return + |.if X64 + | mov L_ARG, [rbp-8] + |.else + | mov L_ARG, [rbp-4] + |.endif + | mov rsp, rbp + | pop rbp + | ret + |.endmacro + + |.macro get_errno // note trashes registers + | call &GetLastError + | mov dword [&jit->last_errno], eax + |.endmacro + + /* the general idea for the return functions is: + * 1) Save return value on stack + * 2) Call get_errno (this trashes the registers hence #1) + * 3) Unpack return value from stack + * 4) Call lua push function + * 5) Set eax to number of returned args (0 or 1) + * 6) Call return which pops our stack frame + */ + + |->return_void: + | get_errno + | mov eax, 0 + | return + + |->return_int: + | mov [rsp+32], eax + | get_errno + | + |.if X64WIN + | cvtsi2sd xmm1, dword [rsp+32] + | mov rcx, L_ARG + |.elif X64 + | cvtsi2sd xmm0, dword [rsp+32] + | mov rdi, L_ARG + |.else + | fild dword [rsp+32] + | fstp qword [rsp+4] + | mov [rsp], L_ARG + |.endif + | + | call &lua_pushnumber + | mov eax, 1 + | return + + |->return_double: + |.if X64 + | movq qword [rsp+32], xmm0 + |.else + | fstp qword [rsp+4] // note get_errno doesn't require any stack on x86 + |.endif + | + | get_errno + | + |.if X64WIN + | movq xmm1, qword [rsp+32] + | mov rcx, L_ARG + |.elif X64 + | movq xmm0, qword [rsp+32] + | mov rdi, L_ARG + |.else + | mov [rsp], L_ARG + |.endif + | call &lua_pushnumber + | mov eax, 1 + | return + + |->return_bool: + | mov [rsp+32], eax + | get_errno + | mov eax, [rsp+32] + | call_pi, &lua_pushboolean, L_ARG, eax + | mov eax, 1 + | return + + |->return_uint: + | mov [rsp+32], eax + | get_errno + | mov eax, [rsp+32] + | call_pi, &push_uint, L_ARG, eax + | mov eax, 1 + | return + + |->too_few_arguments: + | call_pp, &luaL_error, L_ARG, &"too few arguments" + + |->too_many_arguments: + | call_pp, &luaL_error, L_ARG, &"too many arguments" + + compile(Dst, L); +} + +int x86_stack_required(lua_State* L, int usr) +{ + size_t i; + size_t argn = lua_rawlen(L, usr); + int ret = 0; + for (i = 1; i <= argn; i++) { + const ctype_t* ct; + lua_rawgeti(L, usr, i); + ct = (const ctype_t*) lua_touserdata(L, -1); + + if (ct->pointers) { + ret += sizeof(void*); + } else if (ct->type == DOUBLE_TYPE || ct->type == UINT64_TYPE || ct->type == INT64_TYPE) { + ret += 8; + } else if (ct->type == STRUCT_TYPE || ct->type == UNION_TYPE) { + luaL_error(L, "NYI - structs as arguments"); + } else { + /* other numeric types 4 bytes or less */ + ret += 4; + } + + lua_pop(L, 1); + } + + return ret; +} + +#ifdef _WIN64 +#define MAX_REGISTERS 4 /* rcx, rdx, r8, r9 */ +#endif + +#ifdef __amd64__ +#define MAX_INT_REGISTERS 6 /* rdi, rsi, rdx, rcx, r8, r9 */ +#define MAX_FLOAT_REGISTERS 8 /* xmm0-7 */ +#endif + +struct reg_alloc { +#ifdef _WIN64 + int regs; + int is_float[4]; + int is_int[4]; +#elif defined __amd64__ + int floats; + int ints; +#endif + size_t off; +}; + +#ifdef _WIN64 +#define REGISTER_STACK_SPACE (4*8) +#elif defined __amd64__ +#define REGISTER_STACK_SPACE (14*8) +#else +#define REGISTER_STACK_SPACE 0 +#endif + +void add_int32(Dst_DECL, struct reg_alloc* reg) +{ +#ifdef _WIN64 + if (reg->regs >= MAX_REGISTERS) { + | mov [rsp + 8*(reg->regs)], eax + reg->is_int[reg->regs++] = 1; + } else +#elif defined __amd64__ + if (reg->ints >= MAX_INT_REGISTERS) { + | mov [rsp + 8*reg->ints], eax + reg->ints++; + } else +#endif + + { + | mov [rsp+reg->off], eax + reg->off += 4; + } +} + +void add_int64(Dst_DECL, struct reg_alloc* reg) +{ +#if !defined _WIN64 && !defined __amd64__ + |.if not X64 + | mov [rsp + reg->off], RET_L + | mov [rsp + reg->off + 4], RET_H + |.endif + reg->off += 8; +#else + +#ifdef _WIN64 + if (reg->regs >= MAX_REGISTERS) { + | mov [rsp + 8*reg->regs], rax + reg->is_int[reg->regs++] = 1; +#else + if (reg->ints >= MAX_INT_REGISTERS) { + | mov [rsp + 8*reg->ints], rax + reg->ints++; +#endif + + } else { + | mov [rsp + reg->off], rax + reg->off += 8; + } +#endif +} + +void add_double(Dst_DECL, struct reg_alloc* reg, int is_float) +{ +#if !defined _WIN64 && !defined __amd64__ + |.if not X64 + if (is_float) { + | fstp dword [rsp + reg->off] + reg->off += 4; + } else { + | fstp qword [rsp + reg->off] + reg->off += 8; + } +#else + |.else + +#ifdef _WIN64 + if (reg->regs >= MAX_REGISTERS) { + | movq qword [rsp + 8*(reg->regs)], xmm0 + reg->is_float[reg->regs++] = 1; +#else + if (reg->floats >= MAX_FLOAT_REGISTERS) { + | movq qword [rsp + 8*(MAX_INT_REGISTERS + reg->floats)], xmm0 + reg->floats++; +#endif + + } else if (is_float) { + | movd dword [rsp + reg->off], xmm0 + reg->off += 4; + } else { + | movq qword [rsp + reg->off], xmm0 + reg->off += 8; + } + |.endif +#endif +} + +#if defined _WIN64 || defined __amd64__ +#define add_pointer add_int64 +#else +#define add_pointer add_int32 +#endif + +void push_function(jit_t* jit, lua_State* L, function_t func, int ct_usr, const ctype_t* ct) +{ + size_t i, nargs; + int num_upvals; + const ctype_t* mbr_ct; + jit_t* Dst = jit; + struct reg_alloc regs; + + memset(®s, 0, sizeof(regs)); + regs.off = 32 + REGISTER_STACK_SPACE; + + dasm_setup(Dst, build_actionlist); + + lua_pushvalue(L, ct_usr); + ct_usr = lua_gettop(L); + lua_pushvalue(L, CDATA_MT_UPVAL); /* so that CDATA_MT_UPVAL works within the closure */ + num_upvals = 2; + + nargs = lua_rawlen(L, ct_usr); + + if (ct->calling_convention != C_CALL && ct->calling_convention != STD_CALL) { + luaL_error(L, "NYI: calling convention"); + } + +#if defined _WIN64 || defined __amd64__ + if (ct->has_var_arg) { + luaL_error(L, "NYI: calling convention"); + } +#endif + + | push rbp + | mov rbp, rsp + | push L_ARG + | // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip) + | + |.if X64WIN + | mov L_ARG, rcx + | sub rsp, 40 // 8 bytes to realign, 32 bytes shadow space for lua_gettop + | // leave rcx as is for call to lua_gettop + |.elif X64 + | mov L_ARG, rdi + | sub rsp, 8 // to realign (r) + | // leave rdi as is for call to lua_gettop + |.else + | mov L_ARG, [rbp + 8] + | push L_ARG // also realigns stack + |.endif + | + | call &lua_gettop + | cmp eax, nargs + | jl ->too_few_arguments + + if (!ct->has_var_arg) { + | jg ->too_many_arguments + } + + /* note movzxd rax, eax should be used here except it doesn't exist since + * x86-64 guarentees that the upper 32 bits will always be zeroed when setting + * eax */ + | shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16 + | sub rsp, rax + | sub rsp, 32 + REGISTER_STACK_SPACE // reserve an extra 32 to call local functions + + for (i = 1; i <= nargs; i++) { + lua_rawgeti(L, ct_usr, i); + mbr_ct = (const ctype_t*) lua_touserdata(L, -1); + + if (mbr_ct->pointers) { + lua_getuservalue(L, -1); + num_upvals += 2; + | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_pointer(Dst, ®s); + } else { + switch (mbr_ct->type) { + case FUNCTION_TYPE: + lua_getuservalue(L, -1); + num_upvals += 2; + | call_piip, &to_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_pointer(Dst, ®s); + break; + + case ENUM_TYPE: + lua_getuservalue(L, -1); + num_upvals += 2; + | call_piip, &to_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_int32(Dst, ®s); + break; + + case INT8_TYPE: + | call_pi, &to_int32, L_ARG, i + | movsx eax, al + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case UINT8_TYPE: + | call_pi, &to_uint32, L_ARG, i + | movzx eax, al + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case INT16_TYPE: + | call_pi, &to_int32, L_ARG, i + | movsx eax, ax + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case UINT16_TYPE: + | call_pi, &to_uint32, L_ARG, i + | movzx eax, ax + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case INT32_TYPE: + | call_pi, &to_int32, L_ARG, i + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case UINT32_TYPE: + | call_pi, &to_uint32, L_ARG, i + add_int32(Dst, ®s); + lua_pop(L, 1); + break; + + case UINTPTR_TYPE: + | call_pi, &to_uintptr, L_ARG, i + add_pointer(Dst, ®s); + lua_pop(L, 1); + break; + + case INT64_TYPE: + | call_pi, &to_int64, L_ARG, i + add_int64(Dst, ®s); + lua_pop(L, 1); + break; + + case UINT64_TYPE: + | call_pi, &to_uint64, L_ARG, i + add_int64(Dst, ®s); + lua_pop(L, 1); + break; + + case DOUBLE_TYPE: + | call_pi, &to_double, L_ARG, i + add_double(Dst, ®s, 0); + lua_pop(L, 1); + break; + + case FLOAT_TYPE: + | call_pi, &to_double, L_ARG, i + add_double(Dst, ®s, 1); + lua_pop(L, 1); + break; + + default: + luaL_error(L, "NYI: call arg type"); + } + } + } + + if (ct->has_var_arg) { + | mov rax, rsp + | add rax, regs.off + | call_pip, &unpack_varargs, L_ARG, nargs+1, rax +#ifdef _WIN64 + for (i = nargs; i < 4; i++) { + regs.is_int[i] = regs.is_float[i] = 1; + } + regs.regs = 4; +#elif defined __amd64__ + regs.floats = 8; + regs.ints = 6; +#endif + } + + | mov eax, [&jit->last_errno] + | call_i, &SetLastError, eax + + /* remove the stack space to call local functions */ + |.if X32WIN + | add rsp, 28 // SetLastError will have already popped 4 + |.else + | add rsp, 32 + |.endif + +#ifdef _WIN64 + |.if X64WIN + switch (regs.regs) { + case 4: + if (regs.is_float[3]) { + | movq xmm3, qword [rsp + 8*3] + } + if (regs.is_int[3]) { + | mov r9, [rsp + 8*3] + } + case 3: + if (regs.is_float[2]) { + | movq xmm2, qword [rsp + 8*2] + } + if (regs.is_int[2]) { + | mov r8, [rsp + 8*2] + } + case 2: + if (regs.is_float[1]) { + | movq xmm1, qword [rsp + 8*1] + } + if (regs.is_int[1]) { + | mov rdx, [rsp + 8*1] + } + case 1: + if (regs.is_float[0]) { + | movq xmm0, qword [rsp] + } + if (regs.is_int[0]) { + | mov rcx, [rsp] + } + case 0: + break; + } + + /* don't remove the space for the registers as we need 32 bytes of register overflow space */ + assert(REGISTER_STACK_SPACE == 32); + +#elif defined __amd64__ + |.elif X64 + switch (regs.floats) { + case 8: + | movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS+7)] + case 7: + | movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS+6)] + case 6: + | movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS+5)] + case 5: + | movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS+4)] + case 4: + | movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS+3)] + case 3: + | movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS+2)] + case 2: + | movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS+1)] + case 1: + | movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS)] + case 0: + break; + } + + switch (regs.ints) { + case 6: + | mov r9, [rsp + 8*5] + case 5: + | mov r8, [rsp + 8*4] + case 4: + | mov rcx, [rsp + 8*3] + case 3: + | mov rdx, [rsp + 8*2] + case 2: + | mov rsi, [rsp + 8*1] + case 1: + | mov rdi, [rsp] + case 0: + break; + } + + | add rsp, REGISTER_STACK_SPACE + |.endif +#endif + +#ifdef __amd64__ + if (ct->has_var_arg) { + /* al stores an upper limit on the number of float register, note that + * its allowed to be more than the actual number of float registers used as + * long as its 0-8 */ + |.if X64 and not X64WIN + | mov al, 8 + |.endif + } +#endif + + | call &func + | sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables + + /* note on windows X86 the stack may be only aligned to 4 (stdcall will + * have popped a multiple of 4 bytes), but we don't need 16 byte alignment on + * that platform + */ + + lua_rawgeti(L, ct_usr, 0); + mbr_ct = (const ctype_t*) lua_touserdata(L, -1); + + if (mbr_ct->pointers || mbr_ct->type == UINTPTR_TYPE) { + lua_getuservalue(L, -1); + num_upvals += 2; + | mov [rsp+32], rax // save the pointer + | get_errno + | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct + | mov rcx, [rsp+32] + | mov [rax], rcx // *(void**) cdata = val + | mov eax, 1 + | return + + } else { + switch (mbr_ct->type) { + case INT64_TYPE: + case UINT64_TYPE: + num_upvals++; + | // save the return value + |.if X64 + | mov [rsp+32], rax + |.else + | mov [rsp+36], edx // high + | mov [rsp+32], eax // low + |.endif + | + | get_errno + | call_pip, &push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct + | + | // *(int64_t*) cdata = val + |.if X64 + | mov rcx, [rsp+32] + | mov [rax], rcx + |.else + | mov rcx, [rsp+36] + | mov rdx, [rsp+32] + | mov [rax+4], rcx + | mov [rax], rdx + |.endif + | + | mov eax, 1 + | return + break; + + case VOID_TYPE: + | jmp ->return_void + lua_pop(L, 1); + break; + + case BOOL_TYPE: + | jmp ->return_bool + lua_pop(L, 1); + break; + + case INT8_TYPE: + | movsx eax, al + | jmp ->return_int + lua_pop(L, 1); + break; + + case INT16_TYPE: + | movsx eax, ax + | jmp ->return_int + lua_pop(L, 1); + break; + + case UINT8_TYPE: + | movzx eax, al + | jmp ->return_int + lua_pop(L, 1); + break; + + case UINT16_TYPE: + | movzx eax, ax + | jmp ->return_int + lua_pop(L, 1); + break; + + case INT32_TYPE: + case ENUM_TYPE: + | jmp ->return_int + lua_pop(L, 1); + break; + + case UINT32_TYPE: + | jmp ->return_uint + lua_pop(L, 1); + break; + + case DOUBLE_TYPE: + case FLOAT_TYPE: + | jmp ->return_double + lua_pop(L, 1); + break; + + default: + luaL_error(L, "NYI: call return type"); + } + } + + assert(lua_gettop(L) == ct_usr + num_upvals - 1); + lua_pushcclosure(L, (lua_CFunction) compile(Dst, L), num_upvals); +} + |