Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/luajit-rocks.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonan Collobert <ronan@collobert.com>2015-03-10 20:13:43 +0300
committerRonan Collobert <ronan@collobert.com>2015-03-10 20:13:43 +0300
commit2f41c04ac3fe7863308b05f3aeaad9bed43fe0a6 (patch)
tree06b31d2ed5972c8d60e378d80a86b89992a4d2e0
parent0f5edb29dbfe0468f699f5e8ff7595aee0701fcb (diff)
Squashed 'luajit-2.1/' changes from 42ed4e5..361827c
361827c PPC64: Add build infrastructure. c821e0a Merge branch 'master' into v2.1 8b6af89 Fix Lua/C API typecheck error for special indexes. 2c0748f Merge branch 'master' into v2.1 1f578bd FFI: Fix FOLD rule for TOBIT + CONV num.u32. e1a8f3b Merge branch 'master' into v2.1 5c0e33c ARM: Handle more arch defines. 7ff4768 Properly fail unsupported cross-compile to MIPS64. 906008d Merge branch 'master' into v2.1 e6b0875 Fix string to number conversion. 576ddf1 Merge branch 'master' into v2.1 4fa51af x86/x64: Fix code generation for fused test/arith ops. 063f3d5 Merge branch 'master' into v2.1 704280f FFI: Prevent DSE across ffi.string(). 04dc64b LJ_FR2: Fix bytecode generation for method lookups. ca5bbdf Merge branch 'master' into v2.1 dfa173b Fix lexer error for chunks without tokens. 43cb818 Merge branch 'master' into v2.1 7f01300 Don't compile IR_RETF after CALLT to ff with-side effects. b876d6d OpenBSD/x86: Better executable memory allocation for W^X mode. 3e17038 DynASM/PPC: Add missing PPC64 instructions and various extensions. 3f19b34 iOS/ARM64: Fix target OS detection. 962d2aa LJ_GC64: Return true for ffi.abi("gc64"). 33f0c24 ARM64: Add FFI support. ce1a5ee iOS/ARM64: The frame pointer is required. 9647aab LJ_FR2: Fix lua_settable() and lua_setfield(). 1b1bd08 DynASM/ARM64: Fix checks for scaled immediates. 0a5045c Merge branch 'master' into v2.1 86913b9 Bump copyright date to 2015. c71cdf4 Merge branch 'master' into v2.1 999f57d Fix BC_UCLO/BC_JMP join optimization in Lua parser. 71ecc05 Fix MSVC build. fcae87e Merge branch 'master' into v2.1 4d9e8e2 ARM: Minor interpreter optimization. 31e6683 x86: Minor interpreter optimization. 7bdadeb ARM64: Update docs. e1261a0 ARM64: Compatibility fixes for Clang. dfe84fd PPC/e500: Drop support for this architecture. 968725c Merge branch 'master' into v2.1 9caaf65 Fix docs. f307d0a ARM64: Add build infrastructure and initial port of interpreter. cb481dd Add LJ_GC64 mode: 64 bit GC object references. 054e6ab Add LJ_FR2 mode: Two-slot frame info. a13dfd6 DynASM/ARM64: Various fixes. 7a76d43 Merge branch 'master' into v2.1 db7cb5a DynASM/ARM: Fix rollback for variant templates. f45d11e MIPS: Fix excess stack growth in interpreter. 6319e03 PPC: Fix excess stack growth in interpreter. 16f910b ARM: Fix write barrier check in BC_USETS. 5482656 ARM: Fix excess stack growth in interpreter. 1b774d9 Fix corner case in string to number conversion. ecaa4ea Merge branch 'master' into v2.1 5cb6e2e Cleanup of TValue setters. No functional changes. 10caad0 x86: Fix argument checks for ipairs() iterator. 6e9145a Cleanup of memory vs. GC sizes. No functional changes. 82e6e5f x86: Fix stack slot reservation for FP math functions. 9679a94 Disable debug info for generated *.S to avoid confusing gdb. c6130e6 Avoid dependency on frame layout for LJ_POST_FIXCOMP. 881f48f Cleanup of frame handling. No functional changes. b93b624 Merge branch 'master' into v2.1 c6e6924 Gracefully handle lua_error() for a suspended coroutine. cca81a3 Merge branch 'master' into v2.1 6fddb9b Avoid error messages when building with Clang. ad03eba x86/x64: Drop internal x87 math functions. Use libm functions. e03df1e x86/x64: Call external symbols directly from interpreter code. f49c61a DynASM/ARM64: Initial commit of ARM64 module. 1fc9cd0 Merge branch 'master' into v2.1 3f2e4ec DynASM/ARM: Fix description shown for multi-element templates. 8cc8933 Merge branch 'master' into v2.1 c75c62b Fix snapshot #0 handling for traces with a stack check on entry. git-subtree-dir: luajit-2.1 git-subtree-split: 361827c8f986a73cdccfc98ad16fe9f33ed6fb25
-rw-r--r--COPYRIGHT2
-rw-r--r--Makefile2
-rw-r--r--README2
-rw-r--r--doc/bluequad-print.css2
-rw-r--r--doc/bluequad.css2
-rw-r--r--doc/changes.html4
-rw-r--r--doc/contact.html6
-rw-r--r--doc/ext_c_api.html4
-rw-r--r--doc/ext_ffi.html4
-rw-r--r--doc/ext_ffi_api.html6
-rw-r--r--doc/ext_ffi_semantics.html4
-rw-r--r--doc/ext_ffi_tutorial.html4
-rw-r--r--doc/ext_jit.html6
-rw-r--r--doc/ext_profiler.html4
-rw-r--r--doc/extensions.html4
-rw-r--r--doc/faq.html4
-rw-r--r--doc/install.html23
-rw-r--r--doc/luajit.html8
-rw-r--r--doc/running.html4
-rw-r--r--doc/status.html4
-rw-r--r--dynasm/dasm_arm.h2
-rw-r--r--dynasm/dasm_arm.lua9
-rw-r--r--dynasm/dasm_arm64.h518
-rw-r--r--dynasm/dasm_arm64.lua1166
-rw-r--r--dynasm/dasm_mips.h2
-rw-r--r--dynasm/dasm_mips.lua2
-rw-r--r--dynasm/dasm_ppc.h4
-rw-r--r--dynasm/dasm_ppc.lua598
-rw-r--r--dynasm/dasm_proto.h2
-rw-r--r--dynasm/dasm_x64.lua2
-rw-r--r--dynasm/dasm_x86.h2
-rw-r--r--dynasm/dasm_x86.lua2
-rw-r--r--dynasm/dynasm.lua4
-rw-r--r--etc/luajit.12
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile44
-rw-r--r--src/Makefile.dep2
-rw-r--r--src/host/buildvm.c11
-rw-r--r--src/host/buildvm.h3
-rw-r--r--src/host/buildvm_asm.c52
-rw-r--r--src/host/buildvm_fold.c2
-rw-r--r--src/host/buildvm_lib.c2
-rw-r--r--src/host/buildvm_libbc.h15
-rw-r--r--src/host/buildvm_peobj.c2
-rw-r--r--src/host/genlibbc.lua2
-rw-r--r--src/host/genminilua.lua2
-rw-r--r--src/jit/bc.lua2
-rw-r--r--src/jit/bcsave.lua8
-rw-r--r--src/jit/dis_arm.lua2
-rw-r--r--src/jit/dis_mips.lua2
-rw-r--r--src/jit/dis_mipsel.lua2
-rw-r--r--src/jit/dis_ppc.lua2
-rw-r--r--src/jit/dis_x64.lua2
-rw-r--r--src/jit/dis_x86.lua2
-rw-r--r--src/jit/dump.lua2
-rw-r--r--src/jit/p.lua2
-rw-r--r--src/jit/v.lua2
-rw-r--r--src/jit/zone.lua2
-rw-r--r--src/lib_aux.c2
-rw-r--r--src/lib_base.c31
-rw-r--r--src/lib_bit.c6
-rw-r--r--src/lib_debug.c2
-rw-r--r--src/lib_ffi.c7
-rw-r--r--src/lib_init.c2
-rw-r--r--src/lib_io.c2
-rw-r--r--src/lib_jit.c6
-rw-r--r--src/lib_math.c4
-rw-r--r--src/lib_os.c2
-rw-r--r--src/lib_package.c6
-rw-r--r--src/lib_string.c6
-rw-r--r--src/lib_table.c2
-rw-r--r--src/lj_alloc.c12
-rw-r--r--src/lj_api.c105
-rw-r--r--src/lj_arch.h142
-rw-r--r--src/lj_asm.c63
-rw-r--r--src/lj_asm.h2
-rw-r--r--src/lj_asm_arm.h4
-rw-r--r--src/lj_asm_mips.h4
-rw-r--r--src/lj_asm_ppc.h10
-rw-r--r--src/lj_asm_x86.h94
-rw-r--r--src/lj_bc.c2
-rw-r--r--src/lj_bc.h2
-rw-r--r--src/lj_bcdump.h5
-rw-r--r--src/lj_bcread.c7
-rw-r--r--src/lj_bcwrite.c7
-rw-r--r--src/lj_buf.c8
-rw-r--r--src/lj_buf.h2
-rw-r--r--src/lj_carith.c2
-rw-r--r--src/lj_carith.h2
-rw-r--r--src/lj_ccall.c159
-rw-r--r--src/lj_ccall.h29
-rw-r--r--src/lj_ccallback.c101
-rw-r--r--src/lj_ccallback.h2
-rw-r--r--src/lj_cconv.c2
-rw-r--r--src/lj_cconv.h2
-rw-r--r--src/lj_cdata.c2
-rw-r--r--src/lj_cdata.h2
-rw-r--r--src/lj_clib.c2
-rw-r--r--src/lj_clib.h2
-rw-r--r--src/lj_cparse.c2
-rw-r--r--src/lj_cparse.h2
-rw-r--r--src/lj_crecord.c7
-rw-r--r--src/lj_crecord.h2
-rw-r--r--src/lj_ctype.c2
-rw-r--r--src/lj_ctype.h4
-rw-r--r--src/lj_debug.c18
-rw-r--r--src/lj_debug.h2
-rw-r--r--src/lj_def.h24
-rw-r--r--src/lj_dispatch.c4
-rw-r--r--src/lj_dispatch.h2
-rw-r--r--src/lj_emit_arm.h2
-rw-r--r--src/lj_emit_mips.h2
-rw-r--r--src/lj_emit_ppc.h2
-rw-r--r--src/lj_emit_x86.h2
-rw-r--r--src/lj_err.c58
-rw-r--r--src/lj_err.h2
-rw-r--r--src/lj_errmsg.h2
-rw-r--r--src/lj_ff.h2
-rw-r--r--src/lj_ffrecord.c8
-rw-r--r--src/lj_ffrecord.h2
-rw-r--r--src/lj_frame.h102
-rw-r--r--src/lj_func.c2
-rw-r--r--src/lj_func.h2
-rw-r--r--src/lj_gc.c43
-rw-r--r--src/lj_gc.h12
-rw-r--r--src/lj_gdbjit.c2
-rw-r--r--src/lj_gdbjit.h2
-rw-r--r--src/lj_ir.c7
-rw-r--r--src/lj_ir.h13
-rw-r--r--src/lj_ircall.h26
-rw-r--r--src/lj_iropt.h2
-rw-r--r--src/lj_jit.h5
-rw-r--r--src/lj_lex.c3
-rw-r--r--src/lj_lex.h2
-rw-r--r--src/lj_lib.c2
-rw-r--r--src/lj_lib.h10
-rw-r--r--src/lj_load.c2
-rw-r--r--src/lj_mcode.c19
-rw-r--r--src/lj_mcode.h2
-rw-r--r--src/lj_meta.c54
-rw-r--r--src/lj_meta.h2
-rw-r--r--src/lj_obj.c2
-rw-r--r--src/lj_obj.h157
-rw-r--r--src/lj_opt_dce.c2
-rw-r--r--src/lj_opt_fold.c13
-rw-r--r--src/lj_opt_loop.c2
-rw-r--r--src/lj_opt_mem.c3
-rw-r--r--src/lj_opt_narrow.c2
-rw-r--r--src/lj_opt_sink.c2
-rw-r--r--src/lj_opt_split.c2
-rw-r--r--src/lj_parse.c31
-rw-r--r--src/lj_parse.h2
-rw-r--r--src/lj_profile.c2
-rw-r--r--src/lj_profile.h2
-rw-r--r--src/lj_record.c36
-rw-r--r--src/lj_record.h2
-rw-r--r--src/lj_snap.c21
-rw-r--r--src/lj_snap.h2
-rw-r--r--src/lj_state.c14
-rw-r--r--src/lj_state.h2
-rw-r--r--src/lj_str.c2
-rw-r--r--src/lj_str.h2
-rw-r--r--src/lj_strfmt.c2
-rw-r--r--src/lj_strfmt.h2
-rw-r--r--src/lj_strscan.c11
-rw-r--r--src/lj_strscan.h2
-rw-r--r--src/lj_tab.c34
-rw-r--r--src/lj_tab.h2
-rw-r--r--src/lj_target.h4
-rw-r--r--src/lj_target_arm.h2
-rw-r--r--src/lj_target_arm64.h97
-rw-r--r--src/lj_target_mips.h2
-rw-r--r--src/lj_target_ppc.h4
-rw-r--r--src/lj_target_x86.h2
-rw-r--r--src/lj_trace.c4
-rw-r--r--src/lj_trace.h2
-rw-r--r--src/lj_traceerr.h2
-rw-r--r--src/lj_udata.c2
-rw-r--r--src/lj_udata.h2
-rw-r--r--src/lj_vm.h20
-rw-r--r--src/lj_vmevent.c3
-rw-r--r--src/lj_vmevent.h2
-rw-r--r--src/lj_vmmath.c34
-rw-r--r--src/ljamalg.c2
-rw-r--r--src/luaconf.h2
-rw-r--r--src/luajit.c2
-rw-r--r--src/luajit.h4
-rw-r--r--src/lualib.h2
-rw-r--r--src/msvcbuild.bat2
-rw-r--r--src/vm_arm.dasc17
-rw-r--r--src/vm_arm64.dasc3763
-rw-r--r--src/vm_mips.dasc4
-rw-r--r--src/vm_ppc.dasc8
-rw-r--r--src/vm_ppcspe.dasc3685
-rw-r--r--src/vm_x86.dasc448
195 files changed, 7543 insertions, 4836 deletions
diff --git a/COPYRIGHT b/COPYRIGHT
index 08936df..1ef7df6 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,7 +1,7 @@
===============================================================================
LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
-Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/Makefile b/Makefile
index 8ec3635..dce52e5 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@
# For MSVC, please follow the instructions given in src/msvcbuild.bat.
# For MinGW and Cygwin, cd to src and run make with the Makefile there.
#
-# Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
diff --git a/README b/README
index 5007be5..6dcd9b9 100644
--- a/README
+++ b/README
@@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
Project Homepage: http://luajit.org/
-LuaJIT is Copyright (C) 2005-2014 Mike Pall.
+LuaJIT is Copyright (C) 2005-2015 Mike Pall.
LuaJIT is free software, released under the MIT license.
See full Copyright Notice in the COPYRIGHT file or in luajit.h.
diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
index 21da868..07f5c84 100644
--- a/doc/bluequad-print.css
+++ b/doc/bluequad-print.css
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2014 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.
diff --git a/doc/bluequad.css b/doc/bluequad.css
index 05b092e..ae53143 100644
--- a/doc/bluequad.css
+++ b/doc/bluequad.css
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2014 Mike Pall.
+/* Copyright (C) 2004-2015 Mike Pall.
*
* You are welcome to use the general ideas of this design for your own sites.
* But please do not steal the stylesheet, the layout or the color scheme.
diff --git a/doc/changes.html b/doc/changes.html
index 6dbf26b..0d3e5ae 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -4,7 +4,7 @@
<title>LuaJIT Change History</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -920,7 +920,7 @@ This is the initial non-public release of LuaJIT.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/contact.html b/doc/contact.html
index 8d19e7f..d92c3e3 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -4,7 +4,7 @@
<title>Contact</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -86,7 +86,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
<h2>Copyright</h2>
<p>
All documentation is
-Copyright &copy; 2005-2014 Mike Pall.
+Copyright &copy; 2005-2015 Mike Pall.
</p>
@@ -94,7 +94,7 @@ Copyright &copy; 2005-2014 Mike Pall.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index a768e9d..91dd9ef 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -4,7 +4,7 @@
<title>Lua/C API Extensions</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -179,7 +179,7 @@ Also note that this mechanism is not without overhead.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index c044d28..1ff2236 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -4,7 +4,7 @@
<title>FFI Library</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -322,7 +322,7 @@ without undue conversion penalties.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index 6e9b4a3..8cf48dc 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -4,7 +4,7 @@
<title>ffi.* API Functions</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -468,6 +468,8 @@ otherwise. The following parameters are currently defined:
<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
<tr class="odd">
<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
</table>
<h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -558,7 +560,7 @@ named <tt>i</tt>.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index d7e8c86..889d44d 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -4,7 +4,7 @@
<title>FFI Semantics</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -1251,7 +1251,7 @@ compiled.</li>
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index bb7ab55..e3f0146 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -4,7 +4,7 @@
<title>FFI Tutorial</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -593,7 +593,7 @@ it to a local variable in the function scope is unnecessary.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index fb4d434..a569dd5 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -4,7 +4,7 @@
<title>jit.* Library</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -153,7 +153,7 @@ Contains the target OS name:
<h3 id="jit_arch"><tt>jit.arch</tt></h3>
<p>
Contains the target architecture name:
-"x86", "x64" or "ppcspe".
+"x86", "x64", "arm", "ppc", or "mips".
</p>
<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
@@ -191,7 +191,7 @@ if you want to know more.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
index 3b00fc5..be63662 100644
--- a/doc/ext_profiler.html
+++ b/doc/ext_profiler.html
@@ -4,7 +4,7 @@
<title>Profiler</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -355,7 +355,7 @@ use.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/extensions.html b/doc/extensions.html
index 172334a..d2f8d7b 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -4,7 +4,7 @@
<title>Extensions</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -433,7 +433,7 @@ lead to the termination of the process.</li>
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/faq.html b/doc/faq.html
index a95d23d..aebaef5 100644
--- a/doc/faq.html
+++ b/doc/faq.html
@@ -4,7 +4,7 @@
<title>Frequently Asked Questions (FAQ)</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -176,7 +176,7 @@ the development of certain features, if they are important to you.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/install.html b/doc/install.html
index 2ad60c3..9fe0c08 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -4,7 +4,7 @@
<title>Installation</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -134,18 +134,18 @@ operating systems, CPUs and compilers:
<td class="compatos compatno">&nbsp;</td>
</tr>
<tr class="even">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatcpu"><a href="#cross2">ARM64</a></td>
+<td class="compatos">GCC 4.8+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">Clang 3.5+</td>
<td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
</tr>
<tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
-<td class="compatos">GCC 4.3+</td>
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
<td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
<td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
</tr>
<tr class="even">
<td class="compatcpu"><a href="#cross2">MIPS</a></td>
@@ -385,10 +385,11 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
+# ARM64 (requires x64 host)
+make CROSS=aarch64-linux-
+
# PPC
make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
# MIPS big-endian
make HOST_CC="gcc -m32" CROSS=mips-linux-
@@ -638,7 +639,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/luajit.html b/doc/luajit.html
index 1a85f03..721f636 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -4,7 +4,7 @@
<title>LuaJIT</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -152,7 +152,7 @@ Lua is a powerful, dynamic and light-weight programming language.
It may be embedded or used as a general-purpose, stand-alone language.
</p>
<p>
-LuaJIT is Copyright &copy; 2005-2014 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
</p>
<p>
@@ -172,7 +172,7 @@ LuaJIT is Copyright &copy; 2005-2014 Mike Pall, released under the
<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
</table>
<table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
</table>
<table class="feature fcompat">
<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
@@ -226,7 +226,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT.
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/running.html b/doc/running.html
index 08a8f07..2b764dc 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -4,7 +4,7 @@
<title>Running LuaJIT</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -299,7 +299,7 @@ Here are the parameters and their default settings:
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/doc/status.html b/doc/status.html
index 50d217a..cf137ee 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -4,7 +4,7 @@
<title>Status &amp; Roadmap</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2014, Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -117,7 +117,7 @@ Please refer to the
</div>
<div id="foot">
<hr class="hide">
-Copyright &copy; 2005-2014 Mike Pall
+Copyright &copy; 2005-2015 Mike Pall
<span class="noprint">
&middot;
<a href="contact.html">Contact</a>
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
index 435bbf5..57e0116 100644
--- a/dynasm/dasm_arm.h
+++ b/dynasm/dasm_arm.h
@@ -1,6 +1,6 @@
/*
** DynASM ARM encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index b3a8827..90a259c 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM ARM module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
@@ -923,19 +923,22 @@ local function parse_template(params, template, nparams, pos)
end
map_op[".template__"] = function(params, template, nparams)
- if not params then return sub(template, 9) end
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
-- Limit number of section buffer positions used by a single dasm_put().
-- A single opcode needs a maximum of 3 positions.
if secpos+3 > maxsecpos then wflush() end
local pos = wpos()
- local apos, spos = #actargs, secpos
+ local lpos, apos, spos = #actlist, #actargs, secpos
local ok, err
for t in gmatch(template, "[^|]+") do
ok, err = pcall(parse_template, params, t, nparams, pos)
if ok then return end
secpos = spos
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
actargs[apos+1] = nil
actargs[apos+2] = nil
actargs[apos+3] = nil
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 0000000..d912e61
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,518 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+ if ((n >> 12) == 0)
+ return n;
+ else if ((n & 0xff000fff) == 0)
+ return (n >> 12) | 0x1000;
+ else
+ return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+ int n = -1;
+ while (x) { x >>= 1; n++; }
+ return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+ int inv = 0, w = 64, s = 0xfff, xa, xb;
+ unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+ unsigned long long m = 1ULL, a, b, c;
+ if (n & 1) { n = ~n; inv = 1; }
+ a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+ xa = dasm_ffs(a); xb = dasm_ffs(b);
+ if (c) {
+ w = dasm_ffs(c) - xa;
+ if (w == 32) m = 0x0000000100000001UL;
+ else if (w == 16) m = 0x0001000100010001UL;
+ else if (w == 8) m = 0x0101010101010101UL;
+ else if (w == 4) m = 0x1111111111111111UL;
+ else if (w == 2) m = 0x5555555555555555UL;
+ else return -1;
+ s = (-2*w & 0x3f) - 1;
+ } else if (!a) {
+ return -1;
+ } else if (xb == -1) {
+ xb = 64;
+ }
+ if ((b-a) * m != n) return -1;
+ if (inv) {
+ return ((w - xb) << 6) | (s+w+xa-xb);
+ } else {
+ return ((w - xa) << 6) | (s+xb-xa);
+ }
+ return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+ n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+ if ((ins & 0x8000))
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+ else
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ case DASM_IMM6:
+ CK((n >> 6) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM12:
+ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM13W:
+ CK(dasm_imm13(n, n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM13X: {
+ int m = va_arg(ap, int);
+ CK(dasm_imm13(n, m) != -1, RANGE_I);
+ b[pos++] = n;
+ b[pos++] = m;
+ break;
+ }
+ case DASM_IMML: {
+#ifdef DASM_CHECKS
+ int scale = (p[-2] >> 30);
+ CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+ (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+ case DASM_IMML: pos++; break;
+ case DASM_IMM13X: pos += 2; break;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: *cp++ = *p++; break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+ patchrel:
+ if (!(ins & 0xf800)) { /* B, BL */
+ CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+ cp[-1] |= ((n >> 2) & 0x03ffffff);
+ } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
+ CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x00ffffe0);
+ } else if ((ins & 0x3000) == 0x2000) { /* ADR */
+ CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+ } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
+ cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+ } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
+ CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x0007ffe0);
+ }
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM:
+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+ break;
+ case DASM_IMM6:
+ cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+ break;
+ case DASM_IMM12:
+ cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+ break;
+ case DASM_IMM13W:
+ cp[-1] |= (dasm_imm13(n, n) << 10);
+ break;
+ case DASM_IMM13X:
+ cp[-1] |= (dasm_imm13(n, *b++) << 10);
+ break;
+ case DASM_IMML: {
+ int scale = (p[-2] >> 30);
+ cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+ ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+ break;
+ }
+ default: *cp++ = ins; break;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 0000000..9766e47
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "arm",
+ description = "DynASM ARM64 module",
+ version = "1.3.0",
+ vernum = 10300,
+ release = "2014-12-03",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n <= 0x000fffff then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ if n <= 0x000fffff then
+ insert(actlist, pos+1, n)
+ n = map_action.ESC * 0x10000
+ end
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+ hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+ if not expr then werror("expected register name") end
+ local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+ if not parse_reg_type then
+ parse_reg_type = rt
+ elseif parse_reg_type ~= rt then
+ werror("register size mismatch")
+ end
+ return r, tp
+ end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+ if expr == "sp" then return 0x3e0 end
+ local base, tp = parse_reg(expr)
+ if parse_reg_type ~= "x" then werror("bad register type") end
+ parse_reg_type = false
+ return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+ local code = loadstring(s, "")
+ if code then setfenv(code, parse_ctx) end
+ return code
+end or function(s)
+ return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+ local x = tonumber(n)
+ if x then return x end
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+ if ok then return y end
+ end
+ return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n then
+ if signed then
+ local s = sar(m, bits-1)
+ if s == 0 then return shl(m, shift)
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
+ else
+ if sar(m, bits) == 0 then return shl(m, shift) end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ return 0
+ end
+end
+
+local function parse_imm12(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ if shr(n, 12) == 0 then
+ return shl(n, 10)
+ elseif band(n, 0xff000fff) == 0 then
+ return shr(n, 2) + 0x00400000
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM12", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm13(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ local r64 = parse_reg_type == "x"
+ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+ local inv = false
+ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+ local t = {}
+ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+ local b = table.concat(t)
+ b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+ if p0 then
+ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+ local s = band(-2*w, 0x3f) - 1
+ if w == 64 then s = s + 0x1000 end
+ if inv then
+ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+ else
+ return shl(w-#p0, 16) + shl(s+#p1, 10)
+ end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ elseif r64 then
+ waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+ actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+ return 0
+ else
+ waction("IMM13W", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm6(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ if n >= 0 and n <= 63 then
+ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM6", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm_load(imm, scale)
+ local n = parse_number(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+ elseif n >= -256 and n < 256 then
+ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMML", 0, imm)
+ return 0
+ end
+end
+
+local function parse_fpimm(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ local m, e = math.frexp(n)
+ local s, e2 = 0, band(e-2, 7)
+ if m < 0 then m = -m; s = 0x00100000 end
+ m = m*32-16
+ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+ return s + shl(e2, 17) + shl(m, 13)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ werror("NYI fpimm action")
+ end
+end
+
+local function parse_shift(expr)
+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+ s = map_shift[s]
+ if not s then werror("expected shift operand") end
+ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+ local n = match(expr, "^lsl%s*#(%d+)$")
+ n = tonumber(n)
+ if not n then werror("expected shift operand") end
+ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+ werror("bad shift amount")
+ end
+ return shl(n, 17)
+end
+
+local function parse_extend(expr)
+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+ if s == "lsl" then
+ s = parse_reg_type == "x" and 3 or 2
+ else
+ s = map_extend[s]
+ end
+ if not s then werror("expected extend operand") end
+ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+ local c = map_cond[expr]
+ if not c then werror("expected condition operand") end
+ return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
+ local pn, p2 = params[n], params[n+1]
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+ if not p2 then
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+ waction("IMML", 0, format(tp.ctypefmt, tailr))
+ return op + base
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ local scale = shr(op, 30)
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+ elseif wb == "!" then
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+ if not p1a then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+ else
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+ op = op + parse_reg_base(p1a)
+ if p2a ~= "" then
+ local imm = match(p2a, "^,%s*#(.*)$")
+ if imm then
+ op = op + parse_imm_load(imm, scale)
+ else
+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+ op = op + shl(parse_reg(p2b), 16) + 0x00200800
+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+ werror("bad index register type")
+ end
+ if p3b == "" then
+ if parse_reg_type ~= "x" then werror("bad index register type") end
+ op = op + 0x6000
+ else
+ if p3s == "" or p3s == "#0" then
+ elseif p3s == "#"..scale then
+ op = op + 0x1000
+ else
+ werror("bad scale")
+ end
+ if parse_reg_type == "x" then
+ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+ elseif p3b == "sxtx" then op = op + 0xe000
+ else
+ werror("bad extend/shift specifier")
+ end
+ else
+ if p3b == "uxtw" then op = op + 0x4000
+ elseif p3b == "sxtw" then op = op + 0xc000
+ else
+ werror("bad extend/shift specifier")
+ end
+ end
+ end
+ end
+ else
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + 0x01000000
+ end
+ end
+ return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
+ local pn, p2 = params[n], params[n+1]
+ local scale = shr(op, 30) == 0 and 2 or 3
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+ if not p2 then
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+ return op + base + 0x01000000
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + 0x00800000
+ else
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+ op = op + (wb == "!" and 0x01800000 or 0x01000000)
+ end
+ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+ band(op, 0x3b000000) == 0x18000000 then
+ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+ else
+ assert(false, "unknown branch type")
+ end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+ return function(params, nparams)
+ if not params then return "-> "..opname:sub(1, -3) end
+ f(params, nparams)
+ op_template(params, map_op[opname], nparams)
+ end
+end
+
+local function alias_bfx(p)
+ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+ parse_reg(p[1])
+ if parse_reg_type == "w" then
+ p[3] = "#-("..p[3]:sub(2)..")%32"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ else
+ p[3] = "#-("..p[3]:sub(2)..")%64"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+ parse_reg(p[1])
+ local sh = p[3]:sub(2)
+ if parse_reg_type == "w" then
+ p[3] = "#-("..sh..")%32"
+ p[4] = "#31-("..sh..")"
+ else
+ p[3] = "#-("..sh..")%64"
+ p[4] = "#63-("..sh..")"
+ end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+ -- Basic data processing instructions.
+ add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+ add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+ adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+ adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+ cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+ cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+ sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+ sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+ subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+ subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+ cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+ cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+ neg_2 = "4b0003e0DMg",
+ neg_3 = "4b0003e0DMSg",
+ negs_2 = "6b0003e0DMg",
+ negs_3 = "6b0003e0DMSg",
+
+ adc_3 = "1a000000DNMg",
+ adcs_3 = "3a000000DNMg",
+ sbc_3 = "5a000000DNMg",
+ sbcs_3 = "7a000000DNMg",
+ ngc_2 = "5a0003e0DMg",
+ ngcs_2 = "7a0003e0DMg",
+
+ and_3 = "0a000000DNMg|12000000pDNig",
+ and_4 = "0a000000DNMSg",
+ orr_3 = "2a000000DNMg|32000000pDNig",
+ orr_4 = "2a000000DNMSg",
+ eor_3 = "4a000000DNMg|52000000pDNig",
+ eor_4 = "4a000000DNMSg",
+ ands_3 = "6a000000DNMg|72000000DNig",
+ ands_4 = "6a000000DNMSg",
+ tst_2 = "6a00001fNMg|7200001fNig",
+ tst_3 = "6a00001fNMSg",
+
+ bic_3 = "0a200000DNMg",
+ bic_4 = "0a200000DNMSg",
+ orn_3 = "2a200000DNMg",
+ orn_4 = "2a200000DNMSg",
+ eon_3 = "4a200000DNMg",
+ eon_4 = "4a200000DNMSg",
+ bics_3 = "6a200000DNMg",
+ bics_4 = "6a200000DNMSg",
+
+ movn_2 = "12800000DWg",
+ movn_3 = "12800000DWRg",
+ movz_2 = "52800000DWg",
+ movz_3 = "52800000DWRg",
+ movk_2 = "72800000DWg",
+ movk_3 = "72800000DWRg",
+
+ -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+ mov_3 = "2a0003e0DMSg",
+ mvn_2 = "2a2003e0DMg",
+ mvn_3 = "2a2003e0DMSg",
+
+ adr_2 = "10000000DBx",
+ adrp_2 = "90000000DBx",
+
+ csel_4 = "1a800000DNMCg",
+ csinc_4 = "1a800400DNMCg",
+ csinv_4 = "5a800000DNMCg",
+ csneg_4 = "5a800400DNMCg",
+ cset_2 = "1a9f07e0Dcg",
+ csetm_2 = "5a9f03e0Dcg",
+ cinc_3 = "1a800400DNmcg",
+ cinv_3 = "5a800000DNmcg",
+ cneg_3 = "5a800400DNmcg",
+
+ ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+ ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+ madd_4 = "1b000000DNMAg",
+ msub_4 = "1b008000DNMAg",
+ mul_3 = "1b007c00DNMg",
+ mneg_3 = "1b00fc00DNMg",
+
+ smaddl_4 = "9b200000DxNMwAx",
+ smsubl_4 = "9b208000DxNMwAx",
+ smull_3 = "9b207c00DxNMw",
+ smnegl_3 = "9b20fc00DxNMw",
+ smulh_3 = "9b407c00DNMx",
+ umaddl_4 = "9ba00000DxNMwAx",
+ umsubl_4 = "9ba08000DxNMwAx",
+ umull_3 = "9ba07c00DxNMw",
+ umnegl_3 = "9ba0fc00DxNMw",
+ umulh_3 = "9bc07c00DNMx",
+
+ udiv_3 = "1ac00800DNMg",
+ sdiv_3 = "1ac00c00DNMg",
+
+ -- Bit operations.
+ sbfm_4 = "13000000DN12w|93400000DN12x",
+ bfm_4 = "33000000DN12w|b3400000DN12x",
+ ubfm_4 = "53000000DN12w|d3400000DN12x",
+ extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+ sxtb_2 = "13001c00DNw|93401c00DNx",
+ sxth_2 = "13003c00DNw|93403c00DNx",
+ sxtw_2 = "93407c00DxNw",
+ uxtb_2 = "53001c00DNw",
+ uxth_2 = "53003c00DNw",
+
+ sbfx_4 = op_alias("sbfm_4", alias_bfx),
+ bfxil_4 = op_alias("bfm_4", alias_bfx),
+ ubfx_4 = op_alias("ubfm_4", alias_bfx),
+ sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+ bfi_4 = op_alias("bfm_4", alias_bfiz),
+ ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+ lsl_3 = function(params, nparams)
+ if params and params[3]:byte() == 35 then
+ return alias_lslimm(params, nparams)
+ else
+ return op_template(params, "1ac02000DNMg", nparams)
+ end
+ end,
+ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+ clz_2 = "5ac01000DNg",
+ cls_2 = "5ac01400DNg",
+ rbit_2 = "5ac00000DNg",
+ rev_2 = "5ac00800DNw|dac00c00DNx",
+ rev16_2 = "5ac00400DNg",
+ rev32_2 = "dac00800DNx",
+
+ -- Loads and stores.
+ ["strb_*"] = "38000000DwL",
+ ["ldrb_*"] = "38400000DwL",
+ ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+ ["strh_*"] = "78000000DwL",
+ ["ldrh_*"] = "78400000DwL",
+ ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+ ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+ ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+ -- NOTE: ldur etc. are handled by ldr et al.
+
+ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+ ["ldpsw_*"] = "68400000DAxP",
+
+ -- Branches.
+ b_1 = "14000000B",
+ bl_1 = "94000000B",
+ blr_1 = "d63f0000Nx",
+ br_1 = "d61f0000Nx",
+ ret_0 = "d65f03c0",
+ ret_1 = "d65f0000Nx",
+ -- b.cond is added below.
+ cbz_2 = "34000000DBg",
+ cbnz_2 = "35000000DBg",
+ tbz_3 = "36000000DTBw|36000000DTBx",
+ tbnz_3 = "37000000DTBw|37000000DTBx",
+
+ -- Miscellaneous instructions.
+ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+ -- TODO: sys, sysl, ic, dc, at, tlbi
+ -- TODO: hint, yield, wfe, wfi, sev, sevl
+ -- TODO: clrex, dsb, dmb, isb
+ nop_0 = "d503201f",
+ brk_0 = "d4200000",
+ brk_1 = "d4200000W",
+
+ -- Floating point instructions.
+ fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+ fabs_2 = "1e20c000DNf",
+ fneg_2 = "1e214000DNf",
+ fsqrt_2 = "1e21c000DNf",
+
+ fcvt_2 = "1e22c000DdNs|1e624000DsNd",
+
+ -- TODO: half-precision and fixed-point conversions.
+ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+ frintn_2 = "1e244000DNf",
+ frintp_2 = "1e24c000DNf",
+ frintm_2 = "1e254000DNf",
+ frintz_2 = "1e25c000DNf",
+ frinta_2 = "1e264000DNf",
+ frintx_2 = "1e274000DNf",
+ frinti_2 = "1e27c000DNf",
+
+ fadd_3 = "1e202800DNMf",
+ fsub_3 = "1e203800DNMf",
+ fmul_3 = "1e200800DNMf",
+ fnmul_3 = "1e208800DNMf",
+ fdiv_3 = "1e201800DNMf",
+
+ fmadd_4 = "1f000000DNMAf",
+ fmsub_4 = "1f008000DNMAf",
+ fnmadd_4 = "1f200000DNMAf",
+ fnmsub_4 = "1f208000DNMAf",
+
+ fmax_3 = "1e204800DNMf",
+ fmaxnm_3 = "1e206800DNMf",
+ fmin_3 = "1e205800DNMf",
+ fminnm_3 = "1e207800DNMf",
+
+ fcmp_2 = "1e202000NMf|1e202008NZf",
+ fcmpe_2 = "1e202010NMf|1e202018NZf",
+
+ fccmp_4 = "1e200400NMVCf",
+ fccmpe_4 = "1e200410NMVCf",
+
+ fcsel_4 = "1e200c00DNMCf",
+
+ -- TODO: crc32*, aes*, sha*, pmull
+ -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+ local op = tonumber(sub(template, 1, 8), 16)
+ local n = 1
+ local rtt = {}
+
+ parse_reg_type = false
+
+ -- Process each character.
+ for p in gmatch(sub(template, 9), ".") do
+ local q = params[n]
+ if p == "D" then
+ op = op + parse_reg(q); n = n + 1
+ elseif p == "N" then
+ op = op + shl(parse_reg(q), 5); n = n + 1
+ elseif p == "M" then
+ op = op + shl(parse_reg(q), 16); n = n + 1
+ elseif p == "A" then
+ op = op + shl(parse_reg(q), 10); n = n + 1
+ elseif p == "m" then
+ op = op + shl(parse_reg(params[n-1]), 16)
+
+ elseif p == "p" then
+ if q == "sp" then params[n] = "@x31" end
+ elseif p == "g" then
+ if parse_reg_type == "x" then
+ op = op + 0x80000000
+ elseif parse_reg_type ~= "w" then
+ werror("bad register type")
+ end
+ parse_reg_type = false
+ elseif p == "f" then
+ if parse_reg_type == "d" then
+ op = op + 0x00400000
+ elseif parse_reg_type ~= "s" then
+ werror("bad register type")
+ end
+ parse_reg_type = false
+ elseif p == "x" or p == "w" or p == "d" or p == "s" then
+ if parse_reg_type ~= p then
+ werror("register size mismatch")
+ end
+ parse_reg_type = false
+
+ elseif p == "L" then
+ op = parse_load(params, nparams, n, op)
+ elseif p == "P" then
+ op = parse_load_pair(params, nparams, n, op)
+
+ elseif p == "B" then
+ local mode, v, s = parse_label(q, false); n = n + 1
+ local m = branch_type(op)
+ waction("REL_"..mode, v+m, s, 1)
+
+ elseif p == "I" then
+ op = op + parse_imm12(q); n = n + 1
+ elseif p == "i" then
+ op = op + parse_imm13(q); n = n + 1
+ elseif p == "W" then
+ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+ elseif p == "T" then
+ op = op + parse_imm6(q); n = n + 1
+ elseif p == "1" then
+ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+ elseif p == "2" then
+ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+ elseif p == "5" then
+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+ elseif p == "V" then
+ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+ elseif p == "F" then
+ op = op + parse_fpimm(q); n = n + 1
+ elseif p == "Z" then
+ if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+ n = n + 1
+
+ elseif p == "S" then
+ op = op + parse_shift(q); n = n + 1
+ elseif p == "X" then
+ op = op + parse_extend(q); n = n + 1
+ elseif p == "R" then
+ op = op + parse_lslx16(q); n = n + 1
+ elseif p == "C" then
+ op = op + parse_cond(q, 0); n = n + 1
+ elseif p == "c" then
+ op = op + parse_cond(q, 1); n = n + 1
+
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 3 positions.
+ if secpos+3 > maxsecpos then wflush() end
+ local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+ ok, err = pcall(parse_template, params, t, nparams, pos)
+ if ok then return end
+ secpos = spos
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
+ end
+ error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _,p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 4359588..2f4c2d2 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -1,6 +1,6 @@
/*
** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index 82942cb..ae0dbd7 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM MIPS module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index 5ccff25..2ded258 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,6 +1,6 @@
/*
-** DynASM PPC encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** DynASM PPC/PPC64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index 61da50e..3744707 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,8 +1,10 @@
------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
------------------------------------------------------------------------------
-- Module information:
@@ -11,7 +13,7 @@ local _info = {
description = "DynASM PPC module",
version = "1.3.0",
vernum = 10300,
- release = "2011-05-05",
+ release = "2015-01-14",
author = "Mike Pall",
license = "MIT",
}
@@ -297,6 +299,223 @@ local map_op = {
std_2 = "f8000000RD",
stdu_2 = "f8000001RD",
+ -- Primary opcode 4:
+ mulhhwu_3 = "10000010RRR.",
+ machhwu_3 = "10000018RRR.",
+ mulhhw_3 = "10000050RRR.",
+ nmachhw_3 = "1000005cRRR.",
+ machhwsu_3 = "10000098RRR.",
+ machhws_3 = "100000d8RRR.",
+ nmachhws_3 = "100000dcRRR.",
+ mulchwu_3 = "10000110RRR.",
+ macchwu_3 = "10000118RRR.",
+ mulchw_3 = "10000150RRR.",
+ macchw_3 = "10000158RRR.",
+ nmacchw_3 = "1000015cRRR.",
+ macchwsu_3 = "10000198RRR.",
+ macchws_3 = "100001d8RRR.",
+ nmacchws_3 = "100001dcRRR.",
+ mullhw_3 = "10000350RRR.",
+ maclhw_3 = "10000358RRR.",
+ nmaclhw_3 = "1000035cRRR.",
+ maclhwsu_3 = "10000398RRR.",
+ maclhws_3 = "100003d8RRR.",
+ nmaclhws_3 = "100003dcRRR.",
+ machhwuo_3 = "10000418RRR.",
+ nmachhwo_3 = "1000045cRRR.",
+ machhwsuo_3 = "10000498RRR.",
+ machhwso_3 = "100004d8RRR.",
+ nmachhwso_3 = "100004dcRRR.",
+ macchwuo_3 = "10000518RRR.",
+ macchwo_3 = "10000558RRR.",
+ nmacchwo_3 = "1000055cRRR.",
+ macchwsuo_3 = "10000598RRR.",
+ macchwso_3 = "100005d8RRR.",
+ nmacchwso_3 = "100005dcRRR.",
+ maclhwo_3 = "10000758RRR.",
+ nmaclhwo_3 = "1000075cRRR.",
+ maclhwsuo_3 = "10000798RRR.",
+ maclhwso_3 = "100007d8RRR.",
+ nmaclhwso_3 = "100007dcRRR.",
+
+ vaddubm_3 = "10000000VVV",
+ vmaxub_3 = "10000002VVV",
+ vrlb_3 = "10000004VVV",
+ vcmpequb_3 = "10000006VVV",
+ vmuloub_3 = "10000008VVV",
+ vaddfp_3 = "1000000aVVV",
+ vmrghb_3 = "1000000cVVV",
+ vpkuhum_3 = "1000000eVVV",
+ vmhaddshs_4 = "10000020VVVV",
+ vmhraddshs_4 = "10000021VVVV",
+ vmladduhm_4 = "10000022VVVV",
+ vmsumubm_4 = "10000024VVVV",
+ vmsummbm_4 = "10000025VVVV",
+ vmsumuhm_4 = "10000026VVVV",
+ vmsumuhs_4 = "10000027VVVV",
+ vmsumshm_4 = "10000028VVVV",
+ vmsumshs_4 = "10000029VVVV",
+ vsel_4 = "1000002aVVVV",
+ vperm_4 = "1000002bVVVV",
+ vsldoi_4 = "1000002cVVVP",
+ vpermxor_4 = "1000002dVVVV",
+ vmaddfp_4 = "1000002eVVVV~",
+ vnmsubfp_4 = "1000002fVVVV~",
+ vaddeuqm_4 = "1000003cVVVV",
+ vaddecuq_4 = "1000003dVVVV",
+ vsubeuqm_4 = "1000003eVVVV",
+ vsubecuq_4 = "1000003fVVVV",
+ vadduhm_3 = "10000040VVV",
+ vmaxuh_3 = "10000042VVV",
+ vrlh_3 = "10000044VVV",
+ vcmpequh_3 = "10000046VVV",
+ vmulouh_3 = "10000048VVV",
+ vsubfp_3 = "1000004aVVV",
+ vmrghh_3 = "1000004cVVV",
+ vpkuwum_3 = "1000004eVVV",
+ vadduwm_3 = "10000080VVV",
+ vmaxuw_3 = "10000082VVV",
+ vrlw_3 = "10000084VVV",
+ vcmpequw_3 = "10000086VVV",
+ vmulouw_3 = "10000088VVV",
+ vmuluwm_3 = "10000089VVV",
+ vmrghw_3 = "1000008cVVV",
+ vpkuhus_3 = "1000008eVVV",
+ vaddudm_3 = "100000c0VVV",
+ vmaxud_3 = "100000c2VVV",
+ vrld_3 = "100000c4VVV",
+ vcmpeqfp_3 = "100000c6VVV",
+ vcmpequd_3 = "100000c7VVV",
+ vpkuwus_3 = "100000ceVVV",
+ vadduqm_3 = "10000100VVV",
+ vmaxsb_3 = "10000102VVV",
+ vslb_3 = "10000104VVV",
+ vmulosb_3 = "10000108VVV",
+ vrefp_2 = "1000010aV-V",
+ vmrglb_3 = "1000010cVVV",
+ vpkshus_3 = "1000010eVVV",
+ vaddcuq_3 = "10000140VVV",
+ vmaxsh_3 = "10000142VVV",
+ vslh_3 = "10000144VVV",
+ vmulosh_3 = "10000148VVV",
+ vrsqrtefp_2 = "1000014aV-V",
+ vmrglh_3 = "1000014cVVV",
+ vpkswus_3 = "1000014eVVV",
+ vaddcuw_3 = "10000180VVV",
+ vmaxsw_3 = "10000182VVV",
+ vslw_3 = "10000184VVV",
+ vmulosw_3 = "10000188VVV",
+ vexptefp_2 = "1000018aV-V",
+ vmrglw_3 = "1000018cVVV",
+ vpkshss_3 = "1000018eVVV",
+ vmaxsd_3 = "100001c2VVV",
+ vsl_3 = "100001c4VVV",
+ vcmpgefp_3 = "100001c6VVV",
+ vlogefp_2 = "100001caV-V",
+ vpkswss_3 = "100001ceVVV",
+ vadduhs_3 = "10000240VVV",
+ vminuh_3 = "10000242VVV",
+ vsrh_3 = "10000244VVV",
+ vcmpgtuh_3 = "10000246VVV",
+ vmuleuh_3 = "10000248VVV",
+ vrfiz_2 = "1000024aV-V",
+ vsplth_3 = "1000024cVV3",
+ vupkhsh_2 = "1000024eV-V",
+ vminuw_3 = "10000282VVV",
+ vminud_3 = "100002c2VVV",
+ vcmpgtud_3 = "100002c7VVV",
+ vrfim_2 = "100002caV-V",
+ vcmpgtsb_3 = "10000306VVV",
+ vcfux_3 = "1000030aVVA~",
+ vaddshs_3 = "10000340VVV",
+ vminsh_3 = "10000342VVV",
+ vsrah_3 = "10000344VVV",
+ vcmpgtsh_3 = "10000346VVV",
+ vmulesh_3 = "10000348VVV",
+ vcfsx_3 = "1000034aVVA~",
+ vspltish_2 = "1000034cVS",
+ vupkhpx_2 = "1000034eV-V",
+ vaddsws_3 = "10000380VVV",
+ vminsw_3 = "10000382VVV",
+ vsraw_3 = "10000384VVV",
+ vcmpgtsw_3 = "10000386VVV",
+ vmulesw_3 = "10000388VVV",
+ vctuxs_3 = "1000038aVVA~",
+ vspltisw_2 = "1000038cVS",
+ vminsd_3 = "100003c2VVV",
+ vsrad_3 = "100003c4VVV",
+ vcmpbfp_3 = "100003c6VVV",
+ vcmpgtsd_3 = "100003c7VVV",
+ vctsxs_3 = "100003caVVA~",
+ vupklpx_2 = "100003ceV-V",
+ vsububm_3 = "10000400VVV",
+ ["bcdadd._4"] = "10000401VVVy.",
+ vavgub_3 = "10000402VVV",
+ vand_3 = "10000404VVV",
+ ["vcmpequb._3"] = "10000406VVV",
+ vmaxfp_3 = "1000040aVVV",
+ vsubuhm_3 = "10000440VVV",
+ ["bcdsub._4"] = "10000441VVVy.",
+ vavguh_3 = "10000442VVV",
+ vandc_3 = "10000444VVV",
+ ["vcmpequh._3"] = "10000446VVV",
+ vminfp_3 = "1000044aVVV",
+ vpkudum_3 = "1000044eVVV",
+ vsubuwm_3 = "10000480VVV",
+ vavguw_3 = "10000482VVV",
+ vor_3 = "10000484VVV",
+ ["vcmpequw._3"] = "10000486VVV",
+ vpmsumw_3 = "10000488VVV",
+ ["vcmpeqfp._3"] = "100004c6VVV",
+ ["vcmpequd._3"] = "100004c7VVV",
+ vpkudus_3 = "100004ceVVV",
+ vavgsb_3 = "10000502VVV",
+ vavgsh_3 = "10000542VVV",
+ vorc_3 = "10000544VVV",
+ vbpermq_3 = "1000054cVVV",
+ vpksdus_3 = "1000054eVVV",
+ vavgsw_3 = "10000582VVV",
+ vsld_3 = "100005c4VVV",
+ ["vcmpgefp._3"] = "100005c6VVV",
+ vpksdss_3 = "100005ceVVV",
+ vsububs_3 = "10000600VVV",
+ mfvscr_1 = "10000604V--",
+ vsum4ubs_3 = "10000608VVV",
+ vsubuhs_3 = "10000640VVV",
+ mtvscr_1 = "10000644--V",
+ ["vcmpgtuh._3"] = "10000646VVV",
+ vsum4shs_3 = "10000648VVV",
+ vupkhsw_2 = "1000064eV-V",
+ vsubuws_3 = "10000680VVV",
+ vshasigmaw_4 = "10000682VVYp",
+ veqv_3 = "10000684VVV",
+ vsum2sws_3 = "10000688VVV",
+ vmrgow_3 = "1000068cVVV",
+ vshasigmad_4 = "100006c2VVYp",
+ vsrd_3 = "100006c4VVV",
+ ["vcmpgtud._3"] = "100006c7VVV",
+ vupklsw_2 = "100006ceV-V",
+ vupkslw_2 = "100006ceV-V",
+ vsubsbs_3 = "10000700VVV",
+ vclzb_2 = "10000702V-V",
+ vpopcntb_2 = "10000703V-V",
+ ["vcmpgtsb._3"] = "10000706VVV",
+ vsum4sbs_3 = "10000708VVV",
+ vsubshs_3 = "10000740VVV",
+ vclzh_2 = "10000742V-V",
+ vpopcnth_2 = "10000743V-V",
+ ["vcmpgtsh._3"] = "10000746VVV",
+ vsubsws_3 = "10000780VVV",
+ vclzw_2 = "10000782V-V",
+ vpopcntw_2 = "10000783V-V",
+ ["vcmpgtsw._3"] = "10000786VVV",
+ vsumsws_3 = "10000788VVV",
+ vmrgew_3 = "1000078cVVV",
+ vclzd_2 = "100007c2V-V",
+ vpopcntd_2 = "100007c3V-V",
+ ["vcmpbfp._3"] = "100007c6VVV",
+ ["vcmpgtsd._3"] = "100007c7VVV",
+
-- Primary opcode 19:
mcrf_2 = "4c000000XX",
isync_0 = "4c00012c",
@@ -316,6 +535,8 @@ local map_op = {
bclrl_2 = "4c000021AA",
bcctr_2 = "4c000420AA",
bcctrl_2 = "4c000421AA",
+ bctar_2 = "4c000460AA",
+ bctarl_2 = "4c000461AA",
blr_0 = "4e800020",
blrl_0 = "4e800021",
bctr_0 = "4e800420",
@@ -327,6 +548,7 @@ local map_op = {
cmpd_3 = "7c200000XRR",
cmpd_2 = "7c200000-RR",
tw_3 = "7c000008ARR",
+ lvsl_3 = "7c00000cVRR",
subfc_3 = "7c000010RRR.",
subc_3 = "7c000010RRR~.",
mulhdu_3 = "7c000012RRR.",
@@ -351,50 +573,68 @@ local map_op = {
cmplw_2 = "7c000040-RR",
cmpld_3 = "7c200040XRR",
cmpld_2 = "7c200040-RR",
+ lvsr_3 = "7c00004cVRR",
subf_3 = "7c000050RRR.",
sub_3 = "7c000050RRR~.",
+ lbarx_3 = "7c000068RR0R",
ldux_3 = "7c00006aRR0R",
dcbst_2 = "7c00006c-RR",
lwzux_3 = "7c00006eRR0R",
cntlzd_2 = "7c000074RR~",
andc_3 = "7c000078RR~R.",
td_3 = "7c000088ARR",
+ lvewx_3 = "7c00008eVRR",
mulhd_3 = "7c000092RRR.",
+ addg6s_3 = "7c000094RRR",
mulhw_3 = "7c000096RRR.",
+ dlmzb_3 = "7c00009cRR~R.",
ldarx_3 = "7c0000a8RR0R",
dcbf_2 = "7c0000ac-RR",
lbzx_3 = "7c0000aeRR0R",
+ lvx_3 = "7c0000ceVRR",
neg_2 = "7c0000d0RR.",
+ lharx_3 = "7c0000e8RR0R",
lbzux_3 = "7c0000eeRR0R",
popcntb_2 = "7c0000f4RR~",
not_2 = "7c0000f8RR~%.",
nor_3 = "7c0000f8RR~R.",
+ stvebx_3 = "7c00010eVRR",
subfe_3 = "7c000110RRR.",
sube_3 = "7c000110RRR~.",
adde_3 = "7c000114RRR.",
stdx_3 = "7c00012aRR0R",
- stwcx_3 = "7c00012cRR0R.",
+ ["stwcx._3"] = "7c00012dRR0R.",
stwx_3 = "7c00012eRR0R",
prtyw_2 = "7c000134RR~",
+ stvehx_3 = "7c00014eVRR",
stdux_3 = "7c00016aRR0R",
+ ["stqcx._3"] = "7c00016dR:R0R.",
stwux_3 = "7c00016eRR0R",
prtyd_2 = "7c000174RR~",
+ stvewx_3 = "7c00018eVRR",
subfze_2 = "7c000190RR.",
addze_2 = "7c000194RR.",
- stdcx_3 = "7c0001acRR0R.",
+ ["stdcx._3"] = "7c0001adRR0R.",
stbx_3 = "7c0001aeRR0R",
+ stvx_3 = "7c0001ceVRR",
subfme_2 = "7c0001d0RR.",
mulld_3 = "7c0001d2RRR.",
addme_2 = "7c0001d4RR.",
mullw_3 = "7c0001d6RRR.",
dcbtst_2 = "7c0001ec-RR",
stbux_3 = "7c0001eeRR0R",
+ bpermd_3 = "7c0001f8RR~R",
+ lvepxl_3 = "7c00020eVRR",
add_3 = "7c000214RRR.",
+ lqarx_3 = "7c000228R:R0R",
dcbt_2 = "7c00022c-RR",
lhzx_3 = "7c00022eRR0R",
+ cdtbcd_2 = "7c000234RR~",
eqv_3 = "7c000238RR~R.",
+ lvepx_3 = "7c00024eVRR",
eciwx_3 = "7c00026cRR0R",
lhzux_3 = "7c00026eRR0R",
+ cbcdtd_2 = "7c000274RR~",
xor_3 = "7c000278RR~R.",
mfspefscr_1 = "7c0082a6R",
mfxer_1 = "7c0102a6R",
@@ -404,8 +644,12 @@ local map_op = {
lhax_3 = "7c0002aeRR0R",
mftb_1 = "7c0c42e6R",
mftbu_1 = "7c0d42e6R",
+ lvxl_3 = "7c0002ceVRR",
lwaux_3 = "7c0002eaRR0R",
lhaux_3 = "7c0002eeRR0R",
+ popcntw_2 = "7c0002f4RR~",
+ divdeu_3 = "7c000312RRR.",
+ divweu_3 = "7c000316RRR.",
sthx_3 = "7c00032eRR0R",
orc_3 = "7c000338RR~R.",
ecowx_3 = "7c00036cRR0R",
@@ -420,10 +664,14 @@ local map_op = {
mtctr_1 = "7c0903a6R",
dcbi_2 = "7c0003ac-RR",
nand_3 = "7c0003b8RR~R.",
+ dsn_2 = "7c0003c6-RR",
+ stvxl_3 = "7c0003ceVRR",
divd_3 = "7c0003d2RRR.",
divw_3 = "7c0003d6RRR.",
+ popcntd_2 = "7c0003f4RR~",
cmpb_3 = "7c0003f8RR~R.",
mcrxr_1 = "7c000400X",
+ lbdx_3 = "7c000406RRR",
subfco_3 = "7c000410RRR.",
subco_3 = "7c000410RRR~.",
addco_3 = "7c000414RRR.",
@@ -433,16 +681,20 @@ local map_op = {
lfsx_3 = "7c00042eFR0R",
srw_3 = "7c000430RR~R.",
srd_3 = "7c000436RR~R.",
+ lhdx_3 = "7c000446RRR",
subfo_3 = "7c000450RRR.",
subo_3 = "7c000450RRR~.",
lfsux_3 = "7c00046eFR0R",
+ lwdx_3 = "7c000486RRR",
lswi_3 = "7c0004aaRR0A",
sync_0 = "7c0004ac",
lwsync_0 = "7c2004ac",
ptesync_0 = "7c4004ac",
lfdx_3 = "7c0004aeFR0R",
+ lddx_3 = "7c0004c6RRR",
nego_2 = "7c0004d0RR.",
lfdux_3 = "7c0004eeFR0R",
+ stbdx_3 = "7c000506RRR",
subfeo_3 = "7c000510RRR.",
subeo_3 = "7c000510RRR~.",
addeo_3 = "7c000514RRR.",
@@ -450,27 +702,42 @@ local map_op = {
stswx_3 = "7c00052aRR0R",
stwbrx_3 = "7c00052cRR0R",
stfsx_3 = "7c00052eFR0R",
+ sthdx_3 = "7c000546RRR",
+ ["stbcx._3"] = "7c00056dRRR",
stfsux_3 = "7c00056eFR0R",
+ stwdx_3 = "7c000586RRR",
subfzeo_2 = "7c000590RR.",
addzeo_2 = "7c000594RR.",
stswi_3 = "7c0005aaRR0A",
+ ["sthcx._3"] = "7c0005adRRR",
stfdx_3 = "7c0005aeFR0R",
+ stddx_3 = "7c0005c6RRR",
subfmeo_2 = "7c0005d0RR.",
mulldo_3 = "7c0005d2RRR.",
addmeo_2 = "7c0005d4RR.",
mullwo_3 = "7c0005d6RRR.",
dcba_2 = "7c0005ec-RR",
stfdux_3 = "7c0005eeFR0R",
+ stvepxl_3 = "7c00060eVRR",
addo_3 = "7c000614RRR.",
lhbrx_3 = "7c00062cRR0R",
+ lfdpx_3 = "7c00062eF:RR",
sraw_3 = "7c000630RR~R.",
srad_3 = "7c000634RR~R.",
+ lfddx_3 = "7c000646FRR",
+ stvepx_3 = "7c00064eVRR",
srawi_3 = "7c000670RR~A.",
sradi_3 = "7c000674RR~H.",
eieio_0 = "7c0006ac",
lfiwax_3 = "7c0006aeFR0R",
+ divdeuo_3 = "7c000712RRR.",
+ divweuo_3 = "7c000716RRR.",
sthbrx_3 = "7c00072cRR0R",
+ stfdpx_3 = "7c00072eF:RR",
extsh_2 = "7c000734RR~.",
+ stfddx_3 = "7c000746FRR",
+ divdeo_3 = "7c000752RRR.",
+ divweo_3 = "7c000756RRR.",
extsb_2 = "7c000774RR~.",
divduo_3 = "7c000792RRR.",
divwou_3 = "7c000796RRR.",
@@ -481,6 +748,40 @@ local map_op = {
divwo_3 = "7c0007d6RRR.",
dcbz_2 = "7c0007ec-RR",
+ ["tbegin._1"] = "7c00051d1",
+ ["tbegin._0"] = "7c00051d",
+ ["tend._1"] = "7c00055dY",
+ ["tend._0"] = "7c00055d",
+ ["tendall._0"] = "7e00055d",
+ tcheck_1 = "7c00059cX",
+ ["tsr._1"] = "7c0005dd1",
+ ["tsuspend._0"] = "7c0005dd",
+ ["tresume._0"] = "7c2005dd",
+ ["tabortwc._3"] = "7c00061dARR",
+ ["tabortdc._3"] = "7c00065dARR",
+ ["tabortwci._3"] = "7c00069dARS",
+ ["tabortdci._3"] = "7c0006ddARS",
+ ["tabort._1"] = "7c00071d-R-",
+ ["treclaim._1"] = "7c00075d-R",
+ ["trechkpt._0"] = "7c0007dd",
+
+ lxsiwzx_3 = "7c000018QRR",
+ lxsiwax_3 = "7c000098QRR",
+ mfvsrd_2 = "7c000066-Rq",
+ mfvsrwz_2 = "7c0000e6-Rq",
+ stxsiwx_3 = "7c000118QRR",
+ mtvsrd_2 = "7c000166QR",
+ mtvsrwa_2 = "7c0001a6QR",
+ lxvdsx_3 = "7c000298QRR",
+ lxsspx_3 = "7c000418QRR",
+ lxsdx_3 = "7c000498QRR",
+ stxsspx_3 = "7c000518QRR",
+ stxsdx_3 = "7c000598QRR",
+ lxvw4x_3 = "7c000618QRR",
+ lxvd2x_3 = "7c000698QRR",
+ stxvw4x_3 = "7c000718QRR",
+ stxvd2x_3 = "7c000798QRR",
+
-- Primary opcode 30:
rldicl_4 = "78000000RR~HM.",
rldicr_4 = "78000004RR~HM.",
@@ -489,6 +790,12 @@ local map_op = {
rldcl_4 = "78000010RR~RM.",
rldcr_4 = "78000012RR~RM.",
+ -- Primary opcode 56:
+ lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+ -- Primary opcode 57:
+ lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
+
-- Primary opcode 59:
fdivs_3 = "ec000024FFF.",
fsubs_3 = "ec000028FFF.",
@@ -501,6 +808,200 @@ local map_op = {
fmadds_4 = "ec00003aFFFF~.",
fnmsubs_4 = "ec00003cFFFF~.",
fnmadds_4 = "ec00003eFFFF~.",
+ fcfids_2 = "ec00069cF-F.",
+ fcfidus_2 = "ec00079cF-F.",
+
+ dadd_3 = "ec000004FFF.",
+ dqua_4 = "ec000006FFFZ.",
+ dmul_3 = "ec000044FFF.",
+ drrnd_4 = "ec000046FFFZ.",
+ dscli_3 = "ec000084FF6.",
+ dquai_4 = "ec000086SF~FZ.",
+ dscri_3 = "ec0000c4FF6.",
+ drintx_4 = "ec0000c61F~FZ.",
+ dcmpo_3 = "ec000104XFF",
+ dtstex_3 = "ec000144XFF",
+ dtstdc_3 = "ec000184XF6",
+ dtstdg_3 = "ec0001c4XF6",
+ drintn_4 = "ec0001c61F~FZ.",
+ dctdp_2 = "ec000204F-F.",
+ dctfix_2 = "ec000244F-F.",
+ ddedpd_3 = "ec000284ZF~F.",
+ dxex_2 = "ec0002c4F-F.",
+ dsub_3 = "ec000404FFF.",
+ ddiv_3 = "ec000444FFF.",
+ dcmpu_3 = "ec000504XFF",
+ dtstsf_3 = "ec000544XFF",
+ drsp_2 = "ec000604F-F.",
+ dcffix_2 = "ec000644F-F.",
+ denbcd_3 = "ec000684YF~F.",
+ diex_3 = "ec0006c4FFF.",
+
+ -- Primary opcode 60:
+ xsaddsp_3 = "f0000000QQQ",
+ xsmaddasp_3 = "f0000008QQQ",
+ xxsldwi_4 = "f0000010QQQz",
+ xsrsqrtesp_2 = "f0000028Q-Q",
+ xssqrtsp_2 = "f000002cQ-Q",
+ xxsel_4 = "f0000030QQQQ",
+ xssubsp_3 = "f0000040QQQ",
+ xsmaddmsp_3 = "f0000048QQQ",
+ xxpermdi_4 = "f0000050QQQz",
+ xsresp_2 = "f0000068Q-Q",
+ xsmulsp_3 = "f0000080QQQ",
+ xsmsubasp_3 = "f0000088QQQ",
+ xxmrghw_3 = "f0000090QQQ",
+ xsdivsp_3 = "f00000c0QQQ",
+ xsmsubmsp_3 = "f00000c8QQQ",
+ xsadddp_3 = "f0000100QQQ",
+ xsmaddadp_3 = "f0000108QQQ",
+ xscmpudp_3 = "f0000118XQQ",
+ xscvdpuxws_2 = "f0000120Q-Q",
+ xsrdpi_2 = "f0000124Q-Q",
+ xsrsqrtedp_2 = "f0000128Q-Q",
+ xssqrtdp_2 = "f000012cQ-Q",
+ xssubdp_3 = "f0000140QQQ",
+ xsmaddmdp_3 = "f0000148QQQ",
+ xscmpodp_3 = "f0000158XQQ",
+ xscvdpsxws_2 = "f0000160Q-Q",
+ xsrdpiz_2 = "f0000164Q-Q",
+ xsredp_2 = "f0000168Q-Q",
+ xsmuldp_3 = "f0000180QQQ",
+ xsmsubadp_3 = "f0000188QQQ",
+ xxmrglw_3 = "f0000190QQQ",
+ xsrdpip_2 = "f00001a4Q-Q",
+ xstsqrtdp_2 = "f00001a8X-Q",
+ xsrdpic_2 = "f00001acQ-Q",
+ xsdivdp_3 = "f00001c0QQQ",
+ xsmsubmdp_3 = "f00001c8QQQ",
+ xsrdpim_2 = "f00001e4Q-Q",
+ xstdivdp_3 = "f00001e8XQQ",
+ xvaddsp_3 = "f0000200QQQ",
+ xvmaddasp_3 = "f0000208QQQ",
+ xvcmpeqsp_3 = "f0000218QQQ",
+ xvcvspuxws_2 = "f0000220Q-Q",
+ xvrspi_2 = "f0000224Q-Q",
+ xvrsqrtesp_2 = "f0000228Q-Q",
+ xvsqrtsp_2 = "f000022cQ-Q",
+ xvsubsp_3 = "f0000240QQQ",
+ xvmaddmsp_3 = "f0000248QQQ",
+ xvcmpgtsp_3 = "f0000258QQQ",
+ xvcvspsxws_2 = "f0000260Q-Q",
+ xvrspiz_2 = "f0000264Q-Q",
+ xvresp_2 = "f0000268Q-Q",
+ xvmulsp_3 = "f0000280QQQ",
+ xvmsubasp_3 = "f0000288QQQ",
+ xxspltw_3 = "f0000290QQg~",
+ xvcmpgesp_3 = "f0000298QQQ",
+ xvcvuxwsp_2 = "f00002a0Q-Q",
+ xvrspip_2 = "f00002a4Q-Q",
+ xvtsqrtsp_2 = "f00002a8X-Q",
+ xvrspic_2 = "f00002acQ-Q",
+ xvdivsp_3 = "f00002c0QQQ",
+ xvmsubmsp_3 = "f00002c8QQQ",
+ xvcvsxwsp_2 = "f00002e0Q-Q",
+ xvrspim_2 = "f00002e4Q-Q",
+ xvtdivsp_3 = "f00002e8XQQ",
+ xvadddp_3 = "f0000300QQQ",
+ xvmaddadp_3 = "f0000308QQQ",
+ xvcmpeqdp_3 = "f0000318QQQ",
+ xvcvdpuxws_2 = "f0000320Q-Q",
+ xvrdpi_2 = "f0000324Q-Q",
+ xvrsqrtedp_2 = "f0000328Q-Q",
+ xvsqrtdp_2 = "f000032cQ-Q",
+ xvsubdp_3 = "f0000340QQQ",
+ xvmaddmdp_3 = "f0000348QQQ",
+ xvcmpgtdp_3 = "f0000358QQQ",
+ xvcvdpsxws_2 = "f0000360Q-Q",
+ xvrdpiz_2 = "f0000364Q-Q",
+ xvredp_2 = "f0000368Q-Q",
+ xvmuldp_3 = "f0000380QQQ",
+ xvmsubadp_3 = "f0000388QQQ",
+ xvcmpgedp_3 = "f0000398QQQ",
+ xvcvuxwdp_2 = "f00003a0Q-Q",
+ xvrdpip_2 = "f00003a4Q-Q",
+ xvtsqrtdp_2 = "f00003a8X-Q",
+ xvrdpic_2 = "f00003acQ-Q",
+ xvdivdp_3 = "f00003c0QQQ",
+ xvmsubmdp_3 = "f00003c8QQQ",
+ xvcvsxwdp_2 = "f00003e0Q-Q",
+ xvrdpim_2 = "f00003e4Q-Q",
+ xvtdivdp_3 = "f00003e8XQQ",
+ xsnmaddasp_3 = "f0000408QQQ",
+ xxland_3 = "f0000410QQQ",
+ xscvdpsp_2 = "f0000424Q-Q",
+ xscvdpspn_2 = "f000042cQ-Q",
+ xsnmaddmsp_3 = "f0000448QQQ",
+ xxlandc_3 = "f0000450QQQ",
+ xsrsp_2 = "f0000464Q-Q",
+ xsnmsubasp_3 = "f0000488QQQ",
+ xxlor_3 = "f0000490QQQ",
+ xscvuxdsp_2 = "f00004a0Q-Q",
+ xsnmsubmsp_3 = "f00004c8QQQ",
+ xxlxor_3 = "f00004d0QQQ",
+ xscvsxdsp_2 = "f00004e0Q-Q",
+ xsmaxdp_3 = "f0000500QQQ",
+ xsnmaddadp_3 = "f0000508QQQ",
+ xxlnor_3 = "f0000510QQQ",
+ xscvdpuxds_2 = "f0000520Q-Q",
+ xscvspdp_2 = "f0000524Q-Q",
+ xscvspdpn_2 = "f000052cQ-Q",
+ xsmindp_3 = "f0000540QQQ",
+ xsnmaddmdp_3 = "f0000548QQQ",
+ xxlorc_3 = "f0000550QQQ",
+ xscvdpsxds_2 = "f0000560Q-Q",
+ xsabsdp_2 = "f0000564Q-Q",
+ xscpsgndp_3 = "f0000580QQQ",
+ xsnmsubadp_3 = "f0000588QQQ",
+ xxlnand_3 = "f0000590QQQ",
+ xscvuxddp_2 = "f00005a0Q-Q",
+ xsnabsdp_2 = "f00005a4Q-Q",
+ xsnmsubmdp_3 = "f00005c8QQQ",
+ xxleqv_3 = "f00005d0QQQ",
+ xscvsxddp_2 = "f00005e0Q-Q",
+ xsnegdp_2 = "f00005e4Q-Q",
+ xvmaxsp_3 = "f0000600QQQ",
+ xvnmaddasp_3 = "f0000608QQQ",
+ ["xvcmpeqsp._3"] = "f0000618QQQ",
+ xvcvspuxds_2 = "f0000620Q-Q",
+ xvcvdpsp_2 = "f0000624Q-Q",
+ xvminsp_3 = "f0000640QQQ",
+ xvnmaddmsp_3 = "f0000648QQQ",
+ ["xvcmpgtsp._3"] = "f0000658QQQ",
+ xvcvspsxds_2 = "f0000660Q-Q",
+ xvabssp_2 = "f0000664Q-Q",
+ xvcpsgnsp_3 = "f0000680QQQ",
+ xvnmsubasp_3 = "f0000688QQQ",
+ ["xvcmpgesp._3"] = "f0000698QQQ",
+ xvcvuxdsp_2 = "f00006a0Q-Q",
+ xvnabssp_2 = "f00006a4Q-Q",
+ xvnmsubmsp_3 = "f00006c8QQQ",
+ xvcvsxdsp_2 = "f00006e0Q-Q",
+ xvnegsp_2 = "f00006e4Q-Q",
+ xvmaxdp_3 = "f0000700QQQ",
+ xvnmaddadp_3 = "f0000708QQQ",
+ ["xvcmpeqdp._3"] = "f0000718QQQ",
+ xvcvdpuxds_2 = "f0000720Q-Q",
+ xvcvspdp_2 = "f0000724Q-Q",
+ xvmindp_3 = "f0000740QQQ",
+ xvnmaddmdp_3 = "f0000748QQQ",
+ ["xvcmpgtdp._3"] = "f0000758QQQ",
+ xvcvdpsxds_2 = "f0000760Q-Q",
+ xvabsdp_2 = "f0000764Q-Q",
+ xvcpsgndp_3 = "f0000780QQQ",
+ xvnmsubadp_3 = "f0000788QQQ",
+ ["xvcmpgedp._3"] = "f0000798QQQ",
+ xvcvuxddp_2 = "f00007a0Q-Q",
+ xvnabsdp_2 = "f00007a4Q-Q",
+ xvnmsubmdp_3 = "f00007c8QQQ",
+ xvcvsxddp_2 = "f00007e0Q-Q",
+ xvnegdp_2 = "f00007e4Q-Q",
+
+ -- Primary opcode 61:
+ stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+ -- Primary opcode 62:
+ stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
-- Primary opcode 63:
fdiv_3 = "fc000024FFF.",
@@ -526,8 +1027,12 @@ local map_op = {
frsp_2 = "fc000018F-F.",
fctiw_2 = "fc00001cF-F.",
fctiwz_2 = "fc00001eF-F.",
+ ftdiv_2 = "fc000100X-F.",
+ fctiwu_2 = "fc00011cF-F.",
+ fctiwuz_2 = "fc00011eF-F.",
mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
fnabs_2 = "fc000110F-F.",
+ ftsqrt_2 = "fc000140X-F.",
fabs_2 = "fc000210F-F.",
frin_2 = "fc000310F-F.",
friz_2 = "fc000350F-F.",
@@ -537,7 +1042,38 @@ local map_op = {
-- NYI: mtfsf, mtfsb0, mtfsb1.
fctid_2 = "fc00065cF-F.",
fctidz_2 = "fc00065eF-F.",
+ fmrgow_3 = "fc00068cFFF",
fcfid_2 = "fc00069cF-F.",
+ fctidu_2 = "fc00075cF-F.",
+ fctiduz_2 = "fc00075eF-F.",
+ fmrgew_3 = "fc00078cFFF",
+ fcfidu_2 = "fc00079cF-F.",
+
+ daddq_3 = "fc000004F:F:F:.",
+ dquaq_4 = "fc000006F:F:F:Z.",
+ dmulq_3 = "fc000044F:F:F:.",
+ drrndq_4 = "fc000046F:F:F:Z.",
+ dscliq_3 = "fc000084F:F:6.",
+ dquaiq_4 = "fc000086SF:~F:Z.",
+ dscriq_3 = "fc0000c4F:F:6.",
+ drintxq_4 = "fc0000c61F:~F:Z.",
+ dcmpoq_3 = "fc000104XF:F:",
+ dtstexq_3 = "fc000144XF:F:",
+ dtstdcq_3 = "fc000184XF:6",
+ dtstdgq_3 = "fc0001c4XF:6",
+ drintnq_4 = "fc0001c61F:~F:Z.",
+ dctqpq_2 = "fc000204F:-F:.",
+ dctfixq_2 = "fc000244F:-F:.",
+ ddedpdq_3 = "fc000284ZF:~F:.",
+ dxexq_2 = "fc0002c4F:-F:.",
+ dsubq_3 = "fc000404F:F:F:.",
+ ddivq_3 = "fc000444F:F:F:.",
+ dcmpuq_3 = "fc000504XF:F:",
+ dtstsfq_3 = "fc000544XF:F:",
+ drdpq_2 = "fc000604F:-F:.",
+ dcffixq_2 = "fc000644F:-F:.",
+ denbcdq_3 = "fc000684YF:~F:.",
+ diexq_3 = "fc0006c4F:FF:.",
-- Primary opcode 4, SPE APU extension:
evaddw_3 = "10000200RRR",
@@ -884,6 +1420,24 @@ local function parse_fpr(expr)
werror("bad register name `"..expr.."'")
end
+local function parse_vr(expr)
+ local r = match(expr, "^v([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+ local r = match(expr, "^vs([1-6]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 63 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
local function parse_cr(expr)
local r = match(expr, "^cr([0-7])$")
if r then return tonumber(r) end
@@ -914,7 +1468,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
end
end
werror("out of range immediate `"..imm.."'")
- elseif match(imm, "^r([1-3]?[0-9])$") or
+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+ match(imm, "^vs([1-6]?[0-9])$") or
match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
@@ -1027,6 +1582,15 @@ map_op[".template__"] = function(params, template, nparams)
rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
elseif p == "F" then
rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+ elseif p == "V" then
+ rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+ elseif p == "Q" then
+ local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+ local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+ op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+ elseif p == "q" then
+ local vs = parse_vs(params[n]); n = n + 1
+ op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
elseif p == "A" then
rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
elseif p == "S" then
@@ -1047,6 +1611,26 @@ map_op[".template__"] = function(params, template, nparams)
rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
elseif p == "X" then
rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+ elseif p == "1" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+ elseif p == "g" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+ elseif p == "3" then
+ rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+ elseif p == "P" then
+ rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+ elseif p == "p" then
+ op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+ elseif p == "6" then
+ rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+ elseif p == "Y" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+ elseif p == "y" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+ elseif p == "Z" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+ elseif p == "z" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
elseif p == "W" then
op = op + parse_cr(params[n]); n = n + 1
elseif p == "G" then
@@ -1071,6 +1655,8 @@ map_op[".template__"] = function(params, template, nparams)
local lo = band(op, mm)
local hi = band(op, shl(mm, 5))
op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+ elseif p == ":" then
+ if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
elseif p == "-" then
rs = rs - 5
elseif p == "." then
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index e77bb84..a8bc6fd 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -1,6 +1,6 @@
/*
** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua
index 91d676d..b1b6202 100644
--- a/dynasm/dasm_x64.lua
+++ b/dynasm/dasm_x64.lua
@@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x64 module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module.
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index 97cbbea..652e8c9 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -1,6 +1,6 @@
/*
** DynASM x86 encoding engine.
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 5f82f4f..7ca061d 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -1,7 +1,7 @@
------------------------------------------------------------------------------
-- DynASM x86/x64 module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 0ea8697..fffda75 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -2,7 +2,7 @@
-- DynASM. A dynamic assembler for code generation engines.
-- Originally designed and implemented for LuaJIT.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- See below for full copyright notice.
------------------------------------------------------------------------------
@@ -17,7 +17,7 @@ local _info = {
url = "http://luajit.org/dynasm.html",
license = "MIT",
copyright = [[
-Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+Copyright (C) 2005-2015 Mike Pall. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/etc/luajit.1 b/etc/luajit.1
index 723a708..fd38b0a 100644
--- a/etc/luajit.1
+++ b/etc/luajit.1
@@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end"
Runs some nested loops and shows the resulting traces.
.SH COPYRIGHT
.PP
-\fBLuaJIT\fR is Copyright \(co 2005-2014 Mike Pall.
+\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall.
.br
\fBLuaJIT\fR is open source software, released under the MIT license.
.SH SEE ALSO
diff --git a/src/.gitignore b/src/.gitignore
index fc94e82..1a30573 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,4 +4,4 @@ lj_ffdef.h
lj_libdef.h
lj_recdef.h
lj_folddef.h
-lj_vm.s
+lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index fae4c7b..d7539fd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -7,7 +7,7 @@
# Also works with MinGW and Cygwin on Windows.
# Please check msvcbuild.bat for building with MSVC on Windows.
#
-# Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
##############################################################################
MAJVER= 2
@@ -48,8 +48,8 @@ CCOPT= -O2 -fomit-frame-pointer
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
CCOPT_x64=
CCOPT_arm=
+CCOPT_arm64=
CCOPT_ppc=
-CCOPT_ppcspe=
CCOPT_mips=
#
CCDEBUG=
@@ -162,7 +162,8 @@ XCFLAGS=
# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
HOST_CC= $(CC)
@@ -201,6 +202,7 @@ TARGET_XLDFLAGS=
TARGET_XLIBS= -lm
TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -215,12 +217,12 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm64
+else
ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
TARGET_LJARCH= ppc
else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= ppcspe
-else
ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__MIPSEL__=1
@@ -288,15 +290,15 @@ ifeq (Windows,$(TARGET_SYS))
TARGET_XSHLDFLAGS= -shared
TARGET_DYNXLDOPTS=
else
+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
+ TARGET_XCFLAGS+= -fno-stack-protector
+endif
ifeq (Darwin,$(TARGET_SYS))
ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
export MACOSX_DEPLOYMENT_TARGET=10.4
endif
TARGET_STRIP+= -x
TARGET_AR+= 2>/dev/null
- ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
- TARGET_XCFLAGS+= -fno-stack-protector
- endif
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -308,14 +310,13 @@ else
ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x
TARGET_AR+= 2>/dev/null
- TARGET_XCFLAGS+= -fno-stack-protector
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-else
- ifneq (,$(findstring stack-protector,$(shell $(TARGET_CC) -dumpspecs)))
- TARGET_XCFLAGS+= -fno-stack-protector
+ ifeq (arm64,$(TARGET_LJARCH))
+ TARGET_XCFLAGS+= -fno-omit-frame-pointer
endif
+else
ifneq (SunOS,$(TARGET_SYS))
ifneq (PS3,$(TARGET_SYS))
TARGET_XLDFLAGS+= -Wl,-E
@@ -404,7 +405,9 @@ ifeq (Windows,$(TARGET_SYS))
DASM_AFLAGS+= -D WIN
endif
ifeq (x64,$(TARGET_LJARCH))
- DASM_ARCH= x86
+ ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+ DASM_ARCH= x86
+ endif
else
ifeq (arm,$(TARGET_LJARCH))
ifeq (iOS,$(TARGET_SYS))
@@ -418,12 +421,15 @@ ifeq (ppc,$(TARGET_LJARCH))
ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D ROUND
endif
- ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D GPR64
endif
ifeq (PS3,$(TARGET_SYS))
DASM_AFLAGS+= -D PPE -D TOC
endif
+ ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+ DASM_ARCH= ppc64
+ endif
endif
endif
endif
@@ -439,7 +445,7 @@ BUILDVM_X= $(BUILDVM_T)
HOST_O= $(MINILUA_O) $(BUILDVM_O)
HOST_T= $(MINILUA_T) $(BUILDVM_T)
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
LJVM_O= lj_vm.o
LJVM_BOUT= $(LJVM_S)
LJVM_MODE= elfasm
@@ -647,10 +653,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
-%.o: %.s
+%.o: %.S
$(E) "ASM $@"
- $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
- $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+ $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+ $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
$(LUAJIT_O):
$(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 738203b..9aefb23 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -97,7 +97,7 @@ lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ccallback.h
lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
- lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_vm.h lj_jit.h lj_ir.h
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 37b20ae..324dd26 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** This is a tool to build the hand-tuned assembler code required for
** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_x86.h"
#elif LJ_TARGET_ARM
#include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
#elif LJ_TARGET_PPC
#include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
#else
@@ -113,8 +113,8 @@ static const char *sym_decorate(BuildCtx *ctx,
name[0] = '@';
else
*p = '\0';
-#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
- /* Keep @plt. */
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+ /* Keep @plt etc. */
#else
*p = '\0';
#endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
ctx->nreloc = 0;
ctx->globnames = globnames;
+ ctx->extnames = extnames;
ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
ctx->nrelocsym = 0;
for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index f9dc8c4..5588555 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _BUILDVM_H
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
const char *beginsym;
/* Strings generated by DynASM. */
const char *const *globnames;
+ const char *const *extnames;
const char *dasm_ident;
const char *dasm_arch;
/* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 079e9a8..9b7ae53 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
"js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
};
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
const char *sym)
{
const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
exit(1);
}
emit_asm_bytes(ctx, cp, n);
+ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+ /* Various fixups for external symbols outside of our binary. */
+ if (ctx->mode == BUILD_elfasm) {
+ if (LJ_32)
+ fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+ fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+ if (LJ_32)
+ fprintf(ctx->fp, "#endif\n");
+ return;
+ } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+ fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+ return;
+ }
+ }
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
#else
@@ -107,7 +121,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
ins, sym);
exit(1);
}
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+ if ((ins >> 26) == 0x25u) {
+ fprintf(ctx->fp, "\tbl %s\n", sym);
+ } else {
+ fprintf(stderr,
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ ins, sym);
+ exit(1);
+ }
+#elif LJ_TARGET_PPC
#if LJ_TARGET_PS3
#define TOCPREFIX "."
#else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
(ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
} else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+ const char *suffix = strchr(sym, '@');
+ if (suffix && suffix[1] == 'h') {
+ fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+ } else if (suffix && suffix[1] == 'l') {
+ fprintf(ctx->fp, "\tld 12, %s\n", sym);
+ } else
+#endif
fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
} else {
fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
int i, rel;
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+ fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
fprintf(ctx->fp, "\t.text\n");
emit_asm_align(ctx, 4);
@@ -254,8 +288,9 @@ void emit_asm(BuildCtx *ctx)
BuildReloc *r = &ctx->reloc[rel];
int n = r->ofs - ofs;
#if LJ_TARGET_X86ORX64
- if (ctx->mode == BUILD_machasm && r->type != 0) {
- emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ if (r->type != 0 &&
+ (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
} else {
emit_asm_bytes(ctx, ctx->code+ofs, n);
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -289,10 +324,7 @@ void emit_asm(BuildCtx *ctx)
#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
#endif
-#if LJ_TARGET_PPCSPE
- /* Soft-float ABI + SPE. */
- fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3
/* Hard-float ABI. */
fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
#endif
diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
index 7f5d9f2..daed7ec 100644
--- a/src/host/buildvm_fold.c
+++ b/src/host/buildvm_fold.c
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index db95977..e928673 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "buildvm.h"
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
index e96c8a5..45f8f8c 100644
--- a/src/host/buildvm_libbc.h
+++ b/src/host/buildvm_libbc.h
@@ -3,6 +3,20 @@
static const int libbc_endian = 0;
static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#else
0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
@@ -15,6 +29,7 @@ static const uint8_t libbc_code[] = {
0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
2,0,76,3,2,0,75,0,1,0,0,2,0
+#endif
};
static const struct { const char *name; int ofs; } libbc_map[] = {
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index 138b042..4279f50 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -1,6 +1,6 @@
/*
** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Only used for building on Windows, since we cannot assume the presence
** of a suitable assembler. The host and target byte order must match.
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
index f1e1242..4398d8e 100644
--- a/src/host/genlibbc.lua
+++ b/src/host/genlibbc.lua
@@ -2,7 +2,7 @@
-- Lua script to dump the bytecode of the library functions written in Lua.
-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua
index 04c5518..cd0d946 100644
--- a/src/host/genminilua.lua
+++ b/src/host/genminilua.lua
@@ -2,7 +2,7 @@
-- Lua script to generate a customized, minified version of Lua.
-- The resulting 'minilua' is used for the build process of LuaJIT.
----------------------------------------------------------------------------
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 48e06d6..320039f 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT bytecode listing module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 8303263..ef5a1aa 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT module to save/list bytecode.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
@@ -63,7 +63,7 @@ local map_type = {
}
local map_arch = {
- x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+ x86 = true, x64 = true, arm = true, ppc = true,
mips = true, mipsel = true,
}
@@ -202,7 +202,7 @@ typedef struct {
local is64, isbe = false, false
if ctx.arch == "x64" then
is64 = true
- elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+ elseif ctx.arch == "ppc" or ctx.arch == "mips" then
isbe = true
end
@@ -237,7 +237,7 @@ typedef struct {
hdr.eendian = isbe and 2 or 1
hdr.eversion = 1
hdr.type = f16(1)
- hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+ hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, mips=8, mipsel=8 })[ctx.arch])
if ctx.arch == "mips" or ctx.arch == "mipsel" then
hdr.flags = 0x50001006
end
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index a1bff84..dfcbeee 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT ARM disassembler module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index bdd70d7..9466f45 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPS disassembler module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index ee7dd96..f06ffe8 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT MIPSEL disassembler wrapper module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the little-endian functions from the
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index 178df1c..e077d7a 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT PPC disassembler module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT/X license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index a2de324..15d5524 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x64 disassembler wrapper module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This module just exports the 64 bit functions from the combined
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index bc22e42..6bc3806 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT x86/x64 disassembler module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
-- This is a helper module used by the LuaJIT machine code dumper module.
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index c9016ce..5f85849 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT compiler dump module.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
diff --git a/src/jit/p.lua b/src/jit/p.lua
index cd6a061..97d4ccd 100644
--- a/src/jit/p.lua
+++ b/src/jit/p.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler.
--
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 5090935..157c34b 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- Verbose mode of the LuaJIT compiler.
--
--- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
index 7201d01..69f0f16 100644
--- a/src/jit/zone.lua
+++ b/src/jit/zone.lua
@@ -1,7 +1,7 @@
----------------------------------------------------------------------------
-- LuaJIT profiler zones.
--
--- Copyright (C) 2005-2013 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
--
diff --git a/src/lib_aux.c b/src/lib_aux.c
index e88dc7c..4a1b70d 100644
--- a/src/lib_aux.c
+++ b/src/lib_aux.c
@@ -1,6 +1,6 @@
/*
** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lib_base.c b/src/lib_base.c
index 713bdae..35ccdbc 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -1,6 +1,6 @@
/*
** Base and coroutine library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -87,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
cTValue *mo = lj_meta_lookup(L, o, mm);
if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
L->top = o+1; /* Only keep one argument. */
- copyTV(L, L->base-1, mo); /* Replace callable. */
+ copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
return FFH_TAILCALL;
} else {
if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+ if (LJ_FR2) { copyTV(L, o-1, o); o--; }
setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
return FFH_RES(3);
@@ -132,7 +133,7 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
lj_err_caller(L, LJ_ERR_PROTMT);
setgcref(t->metatable, obj2gco(mt));
if (mt) { lj_gc_objbarriert(L, t, mt); }
- settabV(L, L->base-1, t);
+ settabV(L, L->base-1-LJ_FR2, t);
return FFH_RES(1);
}
@@ -145,6 +146,7 @@ LJLIB_CF(getfenv) LJLIB_REC(.)
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ if (LJ_FR2) o--;
}
fn = &gcval(o)->fn;
settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -166,6 +168,7 @@ LJLIB_CF(setfenv)
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ if (LJ_FR2) o--;
}
fn = &gcval(o)->fn;
if (!isluafunc(fn))
@@ -258,7 +261,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
if (lj_strscan_numberobj(o)) {
- copyTV(L, L->base-1, o);
+ copyTV(L, L->base-1-LJ_FR2, o);
return FFH_RES(1);
}
#if LJ_HASFFI
@@ -271,11 +274,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
int32_t i;
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
- setintV(L->base-1, i);
+ setintV(L->base-1-LJ_FR2, i);
return FFH_RES(1);
}
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
- (uint8_t *)&(L->base-1)->n, o, 0);
+ (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
return FFH_RES(1);
}
}
@@ -291,14 +294,14 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
while (lj_char_isspace((unsigned char)(*ep))) ep++;
if (*ep == '\0') {
if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
- setintV(L->base-1, (int32_t)ul);
+ setintV(L->base-1-LJ_FR2, (int32_t)ul);
else
- setnumV(L->base-1, (lua_Number)ul);
+ setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
return FFH_RES(1);
}
}
}
- setnilV(L->base-1);
+ setnilV(L->base-1-LJ_FR2);
return FFH_RES(1);
}
@@ -308,11 +311,11 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
cTValue *mo;
L->top = o+1; /* Only keep one argument. */
if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
- copyTV(L, L->base-1, mo); /* Replace callable. */
+ copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
return FFH_TAILCALL;
}
lj_gc_check(L);
- setstrV(L, L->base-1, lj_strfmt_obj(L, L->base));
+ setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
return FFH_RES(1);
}
@@ -535,7 +538,7 @@ LJLIB_CF(coroutine_status)
if (co == L) s = "running";
else if (co->status == LUA_YIELD) s = "suspended";
else if (co->status != 0) s = "dead";
- else if (co->base > tvref(co->stack)+1) s = "normal";
+ else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
else if (co->top == co->base) s = "dead";
else s = "suspended";
lua_pushstring(L, s);
@@ -577,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
(co->status == 0 && co->top == co->base)) {
ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
if (wrap) lj_err_caller(L, em);
- setboolV(L->base-1, 0);
- setstrV(L, L->base, lj_err_str(L, em));
+ setboolV(L->base-1-LJ_FR2, 0);
+ setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2);
}
lj_state_growstack(co, (MSize)(L->top - L->base));
diff --git a/src/lib_bit.c b/src/lib_bit.c
index ffdc29c..55cb2a8 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -1,6 +1,6 @@
/*
** Bit manipulation library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_bit_c
@@ -33,7 +33,7 @@ static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
{
GCcdata *cd = lj_cdata_new_(L, id, 8);
*(uint64_t *)cdataptr(cd) = x;
- setcdataV(L, L->base-1, cd);
+ setcdataV(L, L->base-1-LJ_FR2, cd);
return FFH_RES(1);
}
#else
@@ -56,7 +56,7 @@ LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
{
#if LJ_HASFFI
CTypeID id = 0;
- setintV(L->base-1, (int32_t)lj_carith_check64(L, 1, &id));
+ setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
return FFH_RES(1);
#else
lj_lib_checknumber(L, 1);
diff --git a/src/lib_debug.c b/src/lib_debug.c
index 9c1a282..b610fb4 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -1,6 +1,6 @@
/*
** Debug library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index bc2339c..b2b2d37 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -1,6 +1,6 @@
/*
** FFI library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_ffi_c
@@ -138,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
}
}
copyTV(L, base, L->top);
- tv = L->top-1;
+ tv = L->top-1-LJ_FR2;
}
return lj_meta_tailcall(L, tv);
}
@@ -751,6 +751,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
#endif
case H_(3af93066,1f001464): b = 1; break; /* le/be */
+#if LJ_GC64
+ case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */
+#endif
default:
break;
}
diff --git a/src/lib_init.c b/src/lib_init.c
index a729d12..85c194a 100644
--- a/src/lib_init.c
+++ b/src/lib_init.c
@@ -1,6 +1,6 @@
/*
** Library initialization.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major parts taken verbatim from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lib_io.c b/src/lib_io.c
index 07a9b47..468d327 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -1,6 +1,6 @@
/*
** I/O library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 21a72a9..178ef24 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -1,6 +1,6 @@
/*
** JIT library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lib_jit_c
@@ -695,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
ver >= 60 ? JIT_F_ARMV6_ : 0;
flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
#endif
+#elif LJ_TARGET_ARM64
+ /* No optional CPU features to detect (for now). */
#elif LJ_TARGET_PPC
#if LJ_HASJIT
#if LJ_ARCH_SQRT
@@ -704,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
flags |= JIT_F_ROUND;
#endif
#endif
-#elif LJ_TARGET_PPCSPE
- /* Nothing to do. */
#elif LJ_TARGET_MIPS
#if LJ_HASJIT
/* Compile-time MIPS CPU detection. */
diff --git a/src/lib_math.c b/src/lib_math.c
index 87ec288..78838fc 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -1,6 +1,6 @@
/*
** Math library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include <math.h>
@@ -57,7 +57,7 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
#else
x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
#endif
- setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */
+ setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
return FFH_RES(1);
}
return FFH_RETRY;
diff --git a/src/lib_os.c b/src/lib_os.c
index 6867849..7b5873a 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -1,6 +1,6 @@
/*
** OS library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lib_package.c b/src/lib_package.c
index d322911..6b6eb8f 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -1,6 +1,6 @@
/*
** Package library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -226,7 +226,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
lua_pop(L, 1);
if (bcdata) {
- if (luaL_loadbuffer(L, bcdata, LJ_MAX_MEM, name) != 0)
+ if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
return PACKAGE_ERR_LOAD;
return 0;
}
@@ -383,7 +383,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
if (lua_isnil(L, -1)) { /* Not found? */
const char *bcname = mksymname(L, name, SYMPREFIX_BC);
const char *bcdata = ll_bcsym(NULL, bcname);
- if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_MEM, name) != 0)
+ if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
}
return 1;
diff --git a/src/lib_string.c b/src/lib_string.c
index 6ca7a76..a6d9986 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -1,6 +1,6 @@
/*
** String library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -58,7 +58,7 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
lj_state_checkstack(L, (MSize)n);
p = (const unsigned char *)strdata(s) + start;
for (i = 0; i < n; i++)
- setintV(L->base + i-1, p[i]);
+ setintV(L->base + i-1-LJ_FR2, p[i]);
return FFH_RES(n);
}
@@ -72,7 +72,7 @@ LJLIB_ASM(string_char) LJLIB_REC(.)
lj_err_arg(L, i, LJ_ERR_BADVAL);
buf[i-1] = (char)k;
}
- setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+ setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
return FFH_RES(1);
}
diff --git a/src/lib_table.c b/src/lib_table.c
index 5f0c8bb..56612ab 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -1,6 +1,6 @@
/*
** Table library.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 7c7ec67..0aad826 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -77,7 +77,7 @@
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
-#if LJ_64
+#if LJ_64 && !LJ_GC64
/* Undocumented, but hey, that's what we all love so much about Windows. */
typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,8 +174,10 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
#endif
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_64 && !LJ_GC64
+/* 64 bit mode with 32 bit pointers needs special support for allocating
+** memory in the lower 2GB.
+*/
#if defined(MAP_32BIT)
@@ -258,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
#else
-/* 32 bit mode is easy. */
+/* 32 bit mode and GC64 mode is easy. */
static LJ_AINLINE void *CALL_MMAP(size_t size)
{
int olderr = errno;
@@ -294,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
#define CALL_MREMAP_NOMOVE 0
#define CALL_MREMAP_MAYMOVE 1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
#else
#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
diff --git a/src/lj_api.c b/src/lj_api.c
index e27123b..1f09284 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -1,6 +1,6 @@
/*
** Public Lua/C API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -189,7 +189,7 @@ LUA_API int lua_type(lua_State *L, int idx)
cTValue *o = index2adr(L, idx);
if (tvisnumber(o)) {
return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (tvislightud(o)) {
return LUA_TLIGHTUSERDATA;
#endif
@@ -269,7 +269,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
return 0;
} else if (tvispri(o1)) {
return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (tvislightud(o1)) {
return o1->u64 == o2->u64;
#endif
@@ -284,8 +284,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
} else {
L->top = base+2;
lj_vm_call(L, base, 1+1);
- L->top -= 2;
- return tvistruecond(L->top+1);
+ L->top -= 2+LJ_FR2;
+ return tvistruecond(L->top+1+LJ_FR2);
}
}
}
@@ -307,8 +307,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
} else {
L->top = base+2;
lj_vm_call(L, base, 1+1);
- L->top -= 2;
- return tvistruecond(L->top+1);
+ L->top -= 2+LJ_FR2;
+ return tvistruecond(L->top+1+LJ_FR2);
}
}
}
@@ -704,8 +704,8 @@ LUA_API void lua_concat(lua_State *L, int n)
n -= (int)(L->top - top);
L->top = top+2;
lj_vm_call(L, top, 1+1);
- L->top--;
- copyTV(L, L->top-1, L->top);
+ L->top -= 1+LJ_FR2;
+ copyTV(L, L->top-1, L->top+LJ_FR2);
} while (--n > 0);
} else if (n == 0) { /* Push empty string. */
setstrV(L, L->top, &G(L)->strempty);
@@ -724,8 +724,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
if (v == NULL) {
L->top += 2;
lj_vm_call(L, L->top-2, 1+1);
- L->top -= 2;
- v = L->top+1;
+ L->top -= 2+LJ_FR2;
+ v = L->top+1+LJ_FR2;
}
copyTV(L, L->top-1, v);
}
@@ -740,8 +740,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
if (v == NULL) {
L->top += 2;
lj_vm_call(L, L->top-2, 1+1);
- L->top -= 2;
- v = L->top+1;
+ L->top -= 2+LJ_FR2;
+ v = L->top+1+LJ_FR2;
}
copyTV(L, L->top, v);
incr_top(L);
@@ -882,13 +882,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
o = lj_meta_tset(L, t, L->top-2);
if (o) {
/* NOBARRIER: lj_meta_tset ensures the table is not black. */
- copyTV(L, o, L->top-1);
L->top -= 2;
+ copyTV(L, o, L->top+1);
} else {
- L->top += 3;
- copyTV(L, L->top-1, L->top-6);
- lj_vm_call(L, L->top-3, 0+1);
- L->top -= 3;
+ TValue *base = L->top;
+ copyTV(L, base+2, base-3-2*LJ_FR2);
+ L->top = base+3;
+ lj_vm_call(L, base, 0+1);
+ L->top -= 3+LJ_FR2;
}
}
@@ -902,14 +903,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
setstrV(L, &key, lj_str_newz(L, k));
o = lj_meta_tset(L, t, &key);
if (o) {
- L->top--;
/* NOBARRIER: lj_meta_tset ensures the table is not black. */
- copyTV(L, o, L->top);
+ copyTV(L, o, --L->top);
} else {
- L->top += 3;
- copyTV(L, L->top-1, L->top-6);
- lj_vm_call(L, L->top-3, 0+1);
- L->top -= 2;
+ TValue *base = L->top;
+ copyTV(L, base+2, base-3-2*LJ_FR2);
+ L->top = base+3;
+ lj_vm_call(L, base, 0+1);
+ L->top -= 2+LJ_FR2;
}
}
@@ -1016,11 +1017,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
/* -- Calls --------------------------------------------------------------- */
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+ TValue *o = L->top, *base = o - nargs;
+ L->top = o+1;
+ for (; o > base; o--) copyTV(L, o, o-1);
+ setnilV(o);
+ return o+1;
+}
+#else
+#define api_call_base(L, nargs) (L->top - (nargs))
+#endif
+
LUA_API void lua_call(lua_State *L, int nargs, int nresults)
{
api_check(L, L->status == 0 || L->status == LUA_ERRERR);
api_checknelems(L, nargs+1);
- lj_vm_call(L, L->top - nargs, nresults+1);
+ lj_vm_call(L, api_call_base(L, nargs), nresults+1);
}
LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1038,7 +1052,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
api_checkvalidindex(L, o);
ef = savestack(L, o);
}
- status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+ status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
if (status) hook_restore(g, oldh);
return status;
}
@@ -1046,12 +1060,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
{
GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+ TValue *top = L->top;
fn->c.f = func;
- setfuncV(L, L->top, fn);
- setlightudV(L->top+1, checklightudptr(L, ud));
+ setfuncV(L, top++, fn);
+ if (LJ_FR2) setnilV(top++);
+ setlightudV(top++, checklightudptr(L, ud));
cframe_nres(L->cframe) = 1+0; /* Zero results. */
- L->top += 2;
- return L->top-1; /* Now call the newly allocated C function. */
+ L->top = top;
+ return top-1; /* Now call the newly allocated C function. */
}
LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1068,10 +1084,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
{
if (luaL_getmetafield(L, idx, field)) {
- TValue *base = L->top--;
- copyTV(L, base, index2adr(L, idx));
- L->top = base+1;
- lj_vm_call(L, base, 1+1);
+ TValue *top = L->top--;
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, index2adr(L, idx));
+ L->top = top;
+ lj_vm_call(L, top-1, 1+1);
return 1;
}
return 0;
@@ -1098,12 +1115,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
} else { /* Yield from hook: add a pseudo-frame. */
TValue *top = L->top;
hook_leave(g);
- top->u64 = cframe_multres(cf);
- setcont(top+1, lj_cont_hook);
- setframe_pc(top+1, cframe_pc(cf)-1);
- setframe_gc(top+2, obj2gco(L));
- setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
- L->top = L->base = top+3;
+ (top++)->u64 = cframe_multres(cf);
+ setcont(top, lj_cont_hook);
+ if (LJ_FR2) top++;
+ setframe_pc(top, cframe_pc(cf)-1);
+ if (LJ_FR2) top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+ setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+ L->top = L->base = top+1;
#if LJ_TARGET_X64
lj_err_throw(L, LUA_YIELD);
#else
@@ -1120,7 +1139,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
LUA_API int lua_resume(lua_State *L, int nargs)
{
if (L->cframe == NULL && L->status <= LUA_YIELD)
- return lj_vm_resume(L, L->top - nargs, 0, 0);
+ return lj_vm_resume(L,
+ L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+ 0, 0);
L->top = L->base;
setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
incr_top(L);
@@ -1150,7 +1171,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
res = (int)(g->gc.total & 0x3ff);
break;
case LUA_GCSTEP: {
- MSize a = (MSize)data << 10;
+ GCSize a = (GCSize)data << 10;
g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
while (g->gc.total >= g->gc.threshold)
if (lj_gc_step(L) > 0) {
diff --git a/src/lj_arch.h b/src/lj_arch.h
index da16a19..61c7e19 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -1,6 +1,6 @@
/*
** Target architecture selection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ARCH_H
@@ -19,10 +19,10 @@
#define LUAJIT_ARCH_x64 2
#define LUAJIT_ARCH_ARM 3
#define LUAJIT_ARCH_arm 3
-#define LUAJIT_ARCH_PPC 4
-#define LUAJIT_ARCH_ppc 4
-#define LUAJIT_ARCH_PPCSPE 5
-#define LUAJIT_ARCH_ppcspe 5
+#define LUAJIT_ARCH_ARM64 4
+#define LUAJIT_ARCH_arm64 4
+#define LUAJIT_ARCH_PPC 5
+#define LUAJIT_ARCH_ppc 5
#define LUAJIT_ARCH_MIPS 6
#define LUAJIT_ARCH_mips 6
@@ -43,12 +43,10 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
-#else
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
-#endif
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS
#else
@@ -96,7 +94,7 @@
#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
@@ -181,7 +179,9 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-#if __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__
+#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#define LJ_ARCH_VERSION 80
+#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70
#elif __ARM_ARCH_6T2__
#define LJ_ARCH_VERSION 61
@@ -191,15 +191,43 @@
#define LJ_ARCH_VERSION 50
#endif
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_NAME "arm64"
+#define LJ_ARCH_BITS 64
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#define LJ_TARGET_ARM64 1
+#define LJ_TARGET_EHRETREG 0
+#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT 1
+#define LJ_TARGET_MASKROT 1
+#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+#define LJ_TARGET_GC64 1
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#define LJ_ARCH_NOJIT 1 /* NYI */
+
+#define LJ_ARCH_VERSION 80
+
#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-#define LJ_ARCH_NAME "ppc"
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN LUAJIT_BE
+#endif
+
#if _LP64
#define LJ_ARCH_BITS 64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME "ppc64le"
+#else
+#define LJ_ARCH_NAME "ppc64"
+#endif
#else
#define LJ_ARCH_BITS 32
+#define LJ_ARCH_NAME "ppc"
#endif
-#define LJ_ARCH_ENDIAN LUAJIT_BE
+
#define LJ_TARGET_PPC 1
#define LJ_TARGET_EHRETREG 3
#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
@@ -208,6 +236,15 @@
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64 1
+#define LJ_ARCH_NOFFI 1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64 1
+#define LJ_TARGET_GC64 1
+#define LJ_ARCH_NOJIT 1 /* NYI */
+#endif
+
#if _ARCH_PWR7
#define LJ_ARCH_VERSION 70
#elif _ARCH_PWR6
@@ -221,10 +258,6 @@
#else
#define LJ_ARCH_VERSION 0
#endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64 1
-#define LJ_ARCH_NOFFI 1
-#endif
#if _ARCH_PPCSQ
#define LJ_ARCH_SQRT 1
#endif
@@ -238,26 +271,6 @@
#define LJ_ARCH_XENON 1
#endif
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#error "The PPC/e500 port is broken and will be abandoned with LuaJIT 2.1"
-#define LJ_ARCH_NAME "ppcspe"
-#define LJ_ARCH_BITS 32
-#define LJ_ARCH_ENDIAN LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP 1
-#endif
-#define LJ_ABI_EABI 1
-#define LJ_TARGET_PPCSPE 1
-#define LJ_TARGET_EHRETREG 3
-#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT 0
-#define LJ_TARGET_MASKROT 1
-#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT 1
-
#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -304,6 +317,16 @@
#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
#error "Need at least GCC 4.2 or newer"
#endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
#elif !LJ_TARGET_PS3
#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
#error "Need at least GCC 4.3 or newer"
@@ -327,20 +350,33 @@
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+#if defined(__AARCH64EB__)
+#error "No support for big-endian ARM64"
+#endif
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
+#endif
+#elif LJ_TARGET_PPC
#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
#error "No support for PowerPC CPUs without double-precision FPU"
#endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#error "No support for little-endian PPC32"
#endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if LJ_ARCH_PPC64
+#error "No support for PowerPC 64 bit mode (yet)"
+#endif
+#ifdef __NO_FPRS__
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS
#if defined(__mips_soft_float)
#error "No support for MIPS CPUs without FPU"
#endif
+#if defined(_LP64)
+#error "No support for MIPS64"
+#endif
#endif
#endif
@@ -365,8 +401,22 @@
#endif
#endif
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64 1
+#else
+#define LJ_GC64 0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2 1
+#else
+#define LJ_FR2 0
+#endif
+
/* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
#define LJ_HASJIT 0
#else
#define LJ_HASJIT 1
@@ -426,11 +476,11 @@
#define LJ_TARGET_UNALIGNED 0
#endif
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
#define LUAJIT_NO_LOG2
#endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
#define LUAJIT_NO_EXP2
#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 71d9a88..9db950a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_asm_c
@@ -1262,9 +1262,6 @@ static void asm_call(ASMState *as, IRIns *ir)
}
#if !LJ_SOFTFP
-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
-
-#if !LJ_TARGET_X86ORX64
static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
@@ -1274,7 +1271,6 @@ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
}
-#endif
static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
{
@@ -1721,6 +1717,11 @@ static void asm_head_side(ASMState *as)
int pass3 = 0;
IRRef i;
+ if (as->snapno && as->topslot > as->parent->topslot) {
+ /* Force snap #0 alloc to prevent register overwrite in stack check. */
+ as->snapno = 0;
+ asm_snap_alloc(as);
+ }
allow = asm_head_side_base(as, irp, allow);
/* Scan all parent SLOADs and collect register dependencies. */
@@ -1912,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
mres = (int32_t)(snap->nslots - baseslot);
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
- mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+ mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -2087,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
if (inloop)
as->modset = RSET_SCRATCH;
break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
- case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+ case IR_ATAN2:
+#if LJ_TARGET_X86
+ if (as->evenspill < 4) /* Leave room to call atan2(). */
+ as->evenspill = 4;
+#endif
+#if !LJ_TARGET_X86ORX64
+ case IR_LDEXP:
+#endif
#endif
case IR_POW:
if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
- ir->prev = REGSP_HINT(RID_XMM0);
if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+ as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+ break;
#else
ir->prev = REGSP_HINT(RID_FPRET);
- if (inloop)
- as->modset |= RSET_SCRATCH;
-#endif
continue;
+#endif
}
/* fallthrough for integer POW */
case IR_DIV: case IR_MOD:
@@ -2114,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
break;
case IR_FPMATH:
#if LJ_TARGET_X86ORX64
- if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
- ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
- if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
+ if (ir->op2 <= IRFPM_TRUNC) {
+ if (!(as->flags & JIT_F_SSE4_1)) {
+ ir->prev = REGSP_HINT(RID_XMM0);
+ if (inloop)
+ as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+ continue;
+ }
+ break;
+ } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+ if (as->evenspill < 4) /* Leave room to call pow(). */
as->evenspill = 4;
-#endif
- if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
- continue;
- } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
- ir->prev = REGSP_HINT(RID_XMM0);
- if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
- continue;
}
+#endif
+ if (inloop)
+ as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
break;
#else
ir->prev = REGSP_HINT(RID_FPRET);
- if (inloop)
- as->modset |= RSET_SCRATCH;
continue;
#endif
#if LJ_TARGET_X86ORX64
diff --git a/src/lj_asm.h b/src/lj_asm.h
index f5d0159..85f2976 100644
--- a/src/lj_asm.h
+++ b/src/lj_asm.h
@@ -1,6 +1,6 @@
/*
** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ASM_H
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 8339367..81843ca 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -1,6 +1,6 @@
/*
** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@@ -481,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index bc52159..adea0e3 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1,6 +1,6 @@
/*
** MIPS IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@@ -372,7 +372,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 815d905..7deeb66 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -1,6 +1,6 @@
/*
** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Register allocator extensions --------------------------------------- */
@@ -323,8 +323,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
} else {
ra_destreg(as, ir, RID_FPRET);
}
+#if LJ_32
} else if (hiop) {
ra_destpair(as, ir);
+#endif
} else {
ra_destreg(as, ir, RID_RET);
}
@@ -343,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
func = ir->op2; irf = IR(func);
if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
if (irref_isk(func)) { /* Call to constant address. */
- ci.func = (ASMFunction)(void *)(irf->i);
+ ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
} else { /* Need a non-argument register for indirect calls. */
RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
Reg freg = ra_alloc1(as, func, allow);
@@ -361,7 +363,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -527,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
/* Otherwise use g->tmptv to hold the TValue. */
RegSet allow = rset_exclude(RSET_GPR, dest);
Reg type;
- emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+ emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
if (!irt_ispri(ir->t)) {
Reg src = ra_alloc1(as, ref, allow);
emit_setgl(as, src, tmptv.gcr);
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 8b54125..941d091 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1,6 +1,6 @@
/*
** x86/x64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Guard handling ------------------------------------------------------ */
@@ -643,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
}
}
-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
-{
- /* The modified regs must match with the *.dasc implementation. */
- RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
- IRIns *irx;
- if (ra_hasreg(ir->r))
- rset_clear(drop, ir->r); /* Dest reg handled below. */
- ra_evictset(as, drop);
- ra_destreg(as, ir, RID_XMM0);
- emit_call(as, lj_vm_pow_sse);
- irx = IR(lref);
- if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
- irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
- ra_left(as, RID_XMM0, lref);
- ra_left(as, RID_XMM1, rref);
-}
-
static void asm_fpmath(ASMState *as, IRIns *ir)
{
- IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
if (fpm == IRFPM_SQRT) {
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1645,53 +1628,28 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
}
} else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
/* Rejoined to pow(). */
- } else { /* Handle x87 ops. */
- int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
- Reg dest = ir->r;
- if (ra_hasreg(dest)) {
- ra_free(as, dest);
- ra_modified(as, dest);
- emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
- }
- emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
- switch (fpm) { /* st0 = lj_vm_*(st0) */
- case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
- case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
- case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
- case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
- case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
- case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
- /* Note: the use of fyl2xp1 would be pointless here. When computing
- ** log(1.0+eps) the precision is already lost after 1.0 is added.
- ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
- */
- emit_x87op(as, XI_FYL2X); break;
- case IRFPM_OTHER:
- switch (ir->o) {
- case IR_ATAN2:
- emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
- case IR_LDEXP:
- emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
- default: lua_assert(0); break;
- }
- break;
- default: lua_assert(0); break;
- }
- asm_x87load(as, ir->op1);
- switch (fpm) {
- case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
- case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
- case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
- case IRFPM_OTHER:
- if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
- break;
- default: break;
- }
+ } else {
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
}
}
-#define asm_atan2(as, ir) asm_fpmath(as, ir)
-#define asm_ldexp(as, ir) asm_fpmath(as, ir)
+#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+
+static void asm_ldexp(ASMState *as, IRIns *ir)
+{
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
+ }
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+ emit_x87op(as, XI_FPOP1);
+ emit_x87op(as, XI_FSCALE);
+ asm_x87load(as, ir->op1);
+ asm_x87load(as, ir->op2);
+}
static void asm_fppowi(ASMState *as, IRIns *ir)
{
@@ -1773,8 +1731,12 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
Reg dest, right;
int32_t k = 0;
if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */
- as->flagmcp = NULL;
- as->mcp += (LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2;
+ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+ if ((p[1] & 15) < 14) {
+ if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */
+ as->flagmcp = NULL;
+ as->mcp = p;
+ } /* else: cannot transform LE/NLE to cc without use of OF. */
}
right = IR(rref)->r;
if (ra_hasreg(right)) {
diff --git a/src/lj_bc.c b/src/lj_bc.c
index d5d3d78..a8f444c 100644
--- a/src/lj_bc.c
+++ b/src/lj_bc.c
@@ -1,6 +1,6 @@
/*
** Bytecode instruction modes.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bc_c
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 826a304..64c1bcd 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -1,6 +1,6 @@
/*
** Bytecode instruction format.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BC_H
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 940ed14..c389831 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -1,6 +1,6 @@
/*
** Bytecode dump definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BCDUMP_H
@@ -42,8 +42,9 @@
#define BCDUMP_F_BE 0x01
#define BCDUMP_F_STRIP 0x02
#define BCDUMP_F_FFI 0x04
+#define BCDUMP_F_FR2 0x08
-#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1)
+#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
/* Type codes for the GC constants of a prototype. Plus length for strings. */
enum {
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 2360bf4..5e50217 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -1,6 +1,6 @@
/*
** Bytecode reader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcread_c
@@ -48,7 +48,7 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
{
lua_assert(len != 0);
- if (len > LJ_MAX_MEM || ls->c < 0)
+ if (len > LJ_MAX_BUF || ls->c < 0)
bcread_error(ls, LJ_ERR_BCBAD);
do {
const char *buf;
@@ -192,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
o->u32.hi = bcread_uleb128(ls);
} else {
lua_assert(tp <= BCDUMP_KTAB_TRUE);
- setitype(o, ~tp);
+ setpriV(o, ~tp);
}
}
@@ -394,6 +394,7 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+ if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI
lua_State *L = ls->L;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index a70735c..b2c0973 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -1,6 +1,6 @@
/*
** Bytecode writer.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_bcwrite_c
@@ -309,8 +309,9 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = BCDUMP_HEAD3;
*p++ = BCDUMP_VERSION;
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
- (LJ_BE ? BCDUMP_F_BE : 0) +
- ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
+ LJ_BE*BCDUMP_F_BE +
+ ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
+ LJ_FR2*BCDUMP_F_FR2;
if (!ctx->strip) {
p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len);
diff --git a/src/lj_buf.c b/src/lj_buf.c
index 05ff1f5..023bb9a 100644
--- a/src/lj_buf.c
+++ b/src/lj_buf.c
@@ -1,6 +1,6 @@
/*
** Buffer handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_buf_c
@@ -31,7 +31,7 @@ static void buf_grow(SBuf *sb, MSize sz)
LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
{
lua_assert(sz > sbufsz(sb));
- if (LJ_UNLIKELY(sz > LJ_MAX_MEM))
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
lj_err_mem(sbufL(sb));
buf_grow(sb, sz);
return sbufB(sb);
@@ -41,7 +41,7 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
{
MSize len = sbuflen(sb);
lua_assert(sz > sbufleft(sb));
- if (LJ_UNLIKELY(sz > LJ_MAX_MEM || len + sz > LJ_MAX_MEM))
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
lj_err_mem(sbufL(sb));
buf_grow(sb, len + sz);
return sbufP(sb);
@@ -178,7 +178,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
char *p;
if (!o) {
badtype: /* Error: bad element type. */
- setsbufP(sb, (intptr_t)i); /* Store failing index. */
+ setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */
return NULL;
} else if (tvisstr(o)) {
MSize len = strV(o)->len;
diff --git a/src/lj_buf.h b/src/lj_buf.h
index 66b285a..1cf1780 100644
--- a/src/lj_buf.h
+++ b/src/lj_buf.h
@@ -1,6 +1,6 @@
/*
** Buffer handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_BUF_H
diff --git a/src/lj_carith.c b/src/lj_carith.c
index f675bf5..9032ea3 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -1,6 +1,6 @@
/*
** C data arithmetic.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_carith.h b/src/lj_carith.h
index bea5ed2..da8320f 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -1,6 +1,6 @@
/*
** C data arithmetic.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CARITH_H
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 1011a1a..5ab5b60 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -1,6 +1,6 @@
/*
** FFI C call handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@@ -290,6 +290,75 @@
#define CCALL_HANDLE_RET \
if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
+#elif LJ_TARGET_ARM64
+/* -- ARM64 calling conventions ------------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = !ccall_classify_struct(cts, ctr); \
+ if (cc->retref) cc->retp = dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ unsigned int cl = ccall_classify_struct(cts, ctr); \
+ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
+ CTSize i = (cl >> 8) - 1; \
+ do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
+ } else { \
+ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
+ memcpy(dp, sp, ctr->size); \
+ }
+
+#define CCALL_HANDLE_COMPLEXRET \
+ /* Complex values are returned in one or two FPRs. */ \
+ cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0].f; \
+ ((float *)dp)[1] = cc->fpr[1].f; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0].d; \
+ ((double *)dp)[1] = cc->fpr[1].d; \
+ }
+
+#define CCALL_HANDLE_STRUCTARG \
+ unsigned int cl = ccall_classify_struct(cts, d); \
+ if (cl == 0) { /* Pass struct by reference. */ \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+ } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
+ isfp = (cl & 4) ? 2 : 1; \
+ } /* else: Pass struct in GPRs or on stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex by value in separate (!) FPRs or on stack. */ \
+ isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
+
+#define CCALL_HANDLE_REGARG \
+ if (LJ_TARGET_IOS && isva) { \
+ /* IOS: All variadic arguments are on the stack. */ \
+ } else if (isfp) { /* Try to pass argument in FPRs. */ \
+ int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
+ if (nfpr + n2 <= CCALL_NARG_FPR) { \
+ dp = &cc->fpr[nfpr]; \
+ nfpr += n2; \
+ goto done; \
+ } else { \
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
+ if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+ } \
+ } else { /* Try to pass argument in GPRs. */ \
+ if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } else { \
+ ngpr = maxgpr; /* Prevent reordering. */ \
+ if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+ } \
+ }
+
#elif LJ_TARGET_PPC
/* -- PPC calling conventions --------------------------------------------- */
@@ -338,42 +407,6 @@
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
-#elif LJ_TARGET_PPCSPE
-/* -- PPC/SPE calling conventions ----------------------------------------- */
-
-#define CCALL_HANDLE_STRUCTRET \
- cc->retref = 1; /* Return all structs by reference. */ \
- cc->gpr[ngpr++] = (GPRArg)dp;
-
-#define CCALL_HANDLE_COMPLEXRET \
- /* Complex values are returned in 2 or 4 GPRs. */ \
- cc->retref = 0;
-
-#define CCALL_HANDLE_COMPLEXRET2 \
- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-
-#define CCALL_HANDLE_STRUCTARG \
- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
- sz = CTSIZE_PTR; /* Pass all structs by reference. */
-
-#define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
-/* PPC/SPE has a softfp ABI. */
-#define CCALL_HANDLE_REGARG \
- if (n > 1) { /* Doesn't fit in a single GPR? */ \
- lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \
- if (n == 2) \
- ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \
- else if (ngpr + n > maxgpr) \
- ngpr = maxgpr; /* Prevent reordering. */ \
- } \
- if (ngpr + n <= maxgpr) { \
- dp = &cc->gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- }
-
#elif LJ_TARGET_MIPS
/* -- MIPS calling conventions -------------------------------------------- */
@@ -620,6 +653,52 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
+/* -- ARM64 ABI struct classification ------------------------------------- */
+
+#if LJ_TARGET_ARM64
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+ CTSize sz = ct->size;
+ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+ while (ct->sib) {
+ CType *sct;
+ ct = ctype_get(cts, ct->sib);
+ if (ctype_isfield(ct->info)) {
+ sct = ctype_rawchild(cts, ct);
+ if (ctype_isfp(sct->info)) {
+ r |= sct->size;
+ if (!isu) n++; else if (n == 0) n = 1;
+ } else if (ctype_iscomplex(sct->info)) {
+ r |= (sct->size >> 1);
+ if (!isu) n += 2; else if (n < 2) n = 2;
+ } else if (ctype_isstruct(sct->info)) {
+ goto substruct;
+ } else {
+ goto noth;
+ }
+ } else if (ctype_isbitfield(ct->info)) {
+ goto noth;
+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ sct = ctype_rawchild(cts, ct);
+ substruct:
+ if (sct->size > 0) {
+ unsigned int s = ccall_classify_struct(cts, sct);
+ if (s <= 1) goto noth;
+ r |= (s & 255);
+ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+ }
+ }
+ }
+ if ((r == 4 || r == 8) && n <= 4)
+ return r + (n << 8);
+noth: /* Not a homogeneous float/double aggregate. */
+ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@@ -802,6 +881,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
cc->fpr[nfpr-2].d[1] = 0;
}
+#elif LJ_TARGET_ARM64
+ if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+ /* Split float HFA or complex float into separate registers. */
+ CTSize i = (sz >> 2) - 1;
+ do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+ }
#else
UNUSED(isfp);
#endif
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index c3ea9e6..91983fe 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -1,6 +1,6 @@
/*
** FFI C call handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CCALL_H
@@ -68,27 +68,32 @@ typedef union FPRArg {
float f[2];
} FPRArg;
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
#define CCALL_NARG_GPR 8
+#define CCALL_NRET_GPR 2
#define CCALL_NARG_FPR 8
-#define CCALL_NRET_GPR 4 /* For complex double. */
-#define CCALL_NRET_FPR 1
-#define CCALL_SPS_EXTRA 4
+#define CCALL_NRET_FPR 4
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
-typedef double FPRArg;
+typedef union FPRArg {
+ double d;
+ float f;
+ uint32_t u32;
+} FPRArg;
-#elif LJ_TARGET_PPCSPE
+#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
-#define CCALL_NARG_FPR 0
-#define CCALL_NRET_GPR 4 /* For softfp complex double. */
-#define CCALL_NRET_FPR 0
-#define CCALL_SPS_FREE 0 /* NYI */
+#define CCALL_NARG_FPR 8
+#define CCALL_NRET_GPR 4 /* For complex double. */
+#define CCALL_NRET_FPR 1
+#define CCALL_SPS_EXTRA 4
+#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
+typedef double FPRArg;
#elif LJ_TARGET_MIPS
@@ -145,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t nfpr; /* Number of arguments in FPRs. */
#elif LJ_TARGET_X86
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_ARM64
+ void *retp; /* Aggregate return pointer in x8. */
#elif LJ_TARGET_PPC
uint8_t nfpr; /* Number of arguments in FPRs. */
#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index b234ec6..66a0944 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -1,6 +1,6 @@
/*
** FFI C callback handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
@@ -27,7 +27,7 @@
#if LJ_OS_NOJIT
-/* Disabled callback support. */
+/* Callbacks disabled. */
#define CALLBACK_SLOT2OFS(slot) (0*(slot))
#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
#define CALLBACK_MAX_SLOT 0
@@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_ARM
#define CALLBACK_MCODE_HEAD 32
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_MCODE_HEAD 32
#elif LJ_TARGET_PPC
#define CALLBACK_MCODE_HEAD 24
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
#elif LJ_TARGET_MIPS
#define CALLBACK_MCODE_HEAD 24
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
#else
@@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#endif
+#ifndef CALLBACK_SLOT2OFS
+#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
+#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#endif
+
/* Convert callback slot number to callback function pointer. */
static void *callback_slot2ptr(CTState *cts, MSize slot)
{
@@ -157,6 +158,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
}
lua_assert(p - page <= CALLBACK_MCODE_SIZE);
}
+#elif LJ_TARGET_ARM64
+static void callback_mcode_init(global_State *g, uint32_t *page)
+{
+ uint32_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+ MSize slot;
+ *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
+ *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
+ *p++ = A64I_BR | A64F_N(RID_X11);
+ *p++ = A64I_NOP;
+ ((void **)p)[0] = target;
+ ((void **)p)[1] = g;
+ p += 4;
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
+ *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
+ p++;
+ }
+ lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+}
#elif LJ_TARGET_PPC
static void callback_mcode_init(global_State *g, uint32_t *page)
{
@@ -351,6 +372,29 @@ void lj_ccallback_mcode_free(CTState *cts)
goto done; \
} CALLBACK_HANDLE_REGARG_FP2
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr + n <= CCALL_NARG_FPR) { \
+ sp = &cts->cb.fpr[nfpr]; \
+ nfpr += n; \
+ goto done; \
+ } else { \
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
+ } \
+ } else { \
+ if (!LJ_TARGET_IOS && n > 1) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } else { \
+ ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
+ } \
+ }
+
#elif LJ_TARGET_PPC
#define CALLBACK_HANDLE_REGARG \
@@ -411,6 +455,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
int gcsteps = 0;
CType *ct;
GCfunc *fn;
+ int fntp;
MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
#if CCALL_NARG_FPR
MSize nfpr = 0;
@@ -421,18 +466,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
ct = ctype_get(cts, id);
- rid = ctype_cid(ct->info);
+ rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+ fntp = LJ_TFUNC;
} else { /* Must set up frame first, before throwing the error. */
ct = NULL;
rid = 0;
fn = (GCfunc *)L;
+ fntp = LJ_TTHREAD;
}
- o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */
- o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */
- o++;
- setframe_gc(o, obj2gco(fn));
- setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+ /* Continuation returns from callback. */
+ if (LJ_FR2) {
+ (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+ (o++)->u64 = rid;
+ o++;
+ } else {
+ o->u32.lo = LJ_CONT_FFI_CALLBACK;
+ o->u32.hi = rid;
+ o++;
+ }
+ setframe_gc(o, obj2gco(fn), fntp);
+ setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
L->top = L->base = ++o;
if (!ct)
lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -483,9 +537,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
L->top = o;
#if LJ_TARGET_X86
/* Store stack adjustment for returns from non-cdecl callbacks. */
- if (ctype_cconv(ct->info) != CTCC_CDECL)
+ if (ctype_cconv(ct->info) != CTCC_CDECL) {
+#if LJ_FR2
+ (L->base-3)->u64 |= (nsp << (16+2));
+#else
(L->base-2)->u32.hi |= (nsp << (16+2));
#endif
+ }
+#endif
while (gcsteps-- > 0)
lj_gc_check(L);
}
@@ -493,7 +552,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
/* Convert Lua object to callback result. */
static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
{
+#if LJ_FR2
+ CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
+#else
CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#endif
#if LJ_TARGET_X86
cts->cb.gpr[2] = 0;
#endif
@@ -562,7 +625,7 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
}
callback_conv_result(cts, L, o);
/* Finally drop C frame and continuation frame. */
- L->top -= 2;
+ L->top -= 2+2*LJ_FR2;
L->base = obase;
L->cframe = cframe_prev(L->cframe);
cts->cb.slot = 0; /* Blacklist C function that called the callback. */
diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h
index 45b5ff0..83dbe04 100644
--- a/src/lj_ccallback.h
+++ b/src/lj_ccallback.h
@@ -1,6 +1,6 @@
/*
** FFI C callback handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CCALLBACK_H
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 90cd36e..8a27076 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -1,6 +1,6 @@
/*
** C type conversions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_cconv.h b/src/lj_cconv.h
index 17e0f05..2bd50ff 100644
--- a/src/lj_cconv.h
+++ b/src/lj_cconv.h
@@ -1,6 +1,6 @@
/*
** C type conversions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CCONV_H
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 164671f..fccf7f1 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -1,6 +1,6 @@
/*
** C data management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index f24f3ad..c8975be 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -1,6 +1,6 @@
/*
** C data management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CDATA_H
diff --git a/src/lj_clib.c b/src/lj_clib.c
index 70e1c8f..6bdad67 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -1,6 +1,6 @@
/*
** FFI C library loader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_clib.h b/src/lj_clib.h
index 4737328..e5dc98e 100644
--- a/src/lj_clib.h
+++ b/src/lj_clib.h
@@ -1,6 +1,6 @@
/*
** FFI C library loader.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CLIB_H
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index fd998ad..1ec3230 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -1,6 +1,6 @@
/*
** C declaration parser.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_cparse.h b/src/lj_cparse.h
index 586f494..441580d 100644
--- a/src/lj_cparse.h
+++ b/src/lj_cparse.h
@@ -1,6 +1,6 @@
/*
** C declaration parser.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CPARSE_H
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index acd786f..e200cc9 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -1,6 +1,6 @@
/*
** Trace recorder for C data operations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_ffrecord_c
@@ -1123,7 +1123,7 @@ static void crec_snap_caller(jit_State *J)
ptrdiff_t delta;
if (!frame_islua(base-1) || J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYICALL);
- J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]);
+ J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
L->top = base; L->base = base - delta;
J->base[-1] = TREF_FALSE;
J->base -= delta; J->baseslot -= (BCReg)delta;
@@ -1484,8 +1484,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
!irt_isguard(J->guardemit)) {
const BCIns *pc = frame_contpc(J->L->base-1) - 1;
if (bc_op(*pc) <= BC_ISNEP) {
- setframe_pc(&J2G(J)->tmptv, pc);
- J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
+ J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
J->postproc = LJ_POST_FIXCOMP;
}
}
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index 765b810..59f342a 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -1,6 +1,6 @@
/*
** Trace recorder for C data operations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CRECORD_H
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 1814a46..2e23c99 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -1,6 +1,6 @@
/*
** C type management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include "lj_obj.h"
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index ff8ee06..6639547 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -1,6 +1,6 @@
/*
** C type management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_CTYPE_H
@@ -263,7 +263,7 @@ typedef struct CTState {
/* -- Predefined types ---------------------------------------------------- */
/* Target-dependent types. */
-#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if LJ_TARGET_PPC
#define CTTYDEFP(_) \
_(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
#else
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 889ac01..3226d03 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -1,6 +1,6 @@
/*
** Debugging and introspection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_debug_c
@@ -15,7 +15,6 @@
#include "lj_frame.h"
#include "lj_bc.h"
#include "lj_strfmt.h"
-#include "lj_vm.h"
#if LJ_HASJIT
#include "lj_jit.h"
#endif
@@ -25,7 +24,7 @@
/* Get frame corresponding to a level. */
cTValue *lj_debug_frame(lua_State *L, int level, int *size)
{
- cTValue *frame, *nextframe, *bot = tvref(L->stack);
+ cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
/* Traverse frames backwards. */
for (nextframe = frame = L->base-1; frame > bot; ) {
if (frame_gc(frame) == obj2gco(L))
@@ -88,8 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
if (frame_islua(f)) {
f = frame_prevl(f);
} else {
- if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) &&
- (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
+ if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
cf = cframe_raw(cframe_prev(cf));
f = frame_prevd(f);
}
@@ -186,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
TValue *nextframe = size ? frame + size : NULL;
GCfunc *fn = frame_func(frame);
BCPos pc = debug_framepc(L, fn, nextframe);
- if (!nextframe) nextframe = L->top;
+ if (!nextframe) nextframe = L->top+LJ_FR2;
if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
if (pc != NO_BCPOS) {
GCproto *pt = funcproto(fn);
@@ -196,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
nextframe = frame;
frame = frame_prevd(frame);
}
- if (frame + slot1 < nextframe) {
+ if (frame + slot1+LJ_FR2 < nextframe) {
*name = "(*vararg)";
return frame+slot1;
}
@@ -207,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
if (pc != NO_BCPOS &&
(*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
;
- else if (slot1 > 0 && frame + slot1 < nextframe)
+ else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
*name = "(*temporary)";
return frame+slot1;
}
@@ -270,7 +268,7 @@ restart:
*name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
if (ip > proto_bc(pt)) {
BCIns insp = ip[-1];
- if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
+ if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
bc_d(insp) == bc_b(ins))
return "method";
}
@@ -292,7 +290,7 @@ const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
cTValue *pframe;
GCfunc *fn;
BCPos pc;
- if (frame <= tvref(L->stack))
+ if (frame <= tvref(L->stack)+LJ_FR2)
return NULL;
if (frame_isvarg(frame))
frame = frame_prevd(frame);
diff --git a/src/lj_debug.h b/src/lj_debug.h
index f6e5217..11d308a 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -1,6 +1,6 @@
/*
** Debugging and introspection.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_DEBUG_H
diff --git a/src/lj_def.h b/src/lj_def.h
index 8624aed..c8fe4aa 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -1,6 +1,6 @@
/*
** LuaJIT common internal definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_DEF_H
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
#include <stdlib.h>
/* Various VM limits. */
-#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */
+#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
+#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
+/* Max. total memory allocation. */
+#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
-#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */
-#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */
+#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
+#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
+#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
-#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */
+#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
@@ -99,6 +103,14 @@ typedef unsigned int uintptr_t;
#define checki32(x) ((x) == (int32_t)(x))
#define checku32(x) ((x) == (uint32_t)(x))
#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
+#define checkptr47(x) (((uint64_t)(x) >> 47) == 0)
+#if LJ_GC64
+#define checkptrGC(x) (checkptr47((x)))
+#elif LJ_64
+#define checkptrGC(x) (checkptr32((x)))
+#else
+#define checkptrGC(x) 1
+#endif
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index ea196ef..1a07371 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -1,6 +1,6 @@
/*
** Instruction dispatch handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_dispatch_c
@@ -393,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
if (bc_op(ins) == BC_UCLO)
ins = pc[bc_j(ins)];
switch (bc_op(ins)) {
- case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
+ case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
case BC_TSETM: return bc_a(ins) + nres-1;
default: return pt->framesize;
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index c3f6d86..1e247e3 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -1,6 +1,6 @@
/*
** Instruction dispatch handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_DISPATCH_H
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index aa36e83..45ce519 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -1,6 +1,6 @@
/*
** ARM instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Constant encoding --------------------------------------------------- */
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index fa39e91..8e7ee66 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -1,6 +1,6 @@
/*
** MIPS instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Emit basic instructions --------------------------------------------- */
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 1e5aa65..087860e 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -1,6 +1,6 @@
/*
** PPC instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Emit basic instructions --------------------------------------------- */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 0c7fa14..ac42db3 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -1,6 +1,6 @@
/*
** x86/x64 instruction emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Emit basic instructions --------------------------------------------- */
diff --git a/src/lj_err.c b/src/lj_err.c
index d37df31..4f13494 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -1,6 +1,6 @@
/*
** Error handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_err_c
@@ -106,7 +106,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
return cf;
}
}
- if (frame <= tvref(L->stack))
+ if (frame <= tvref(L->stack)+LJ_FR2)
break;
switch (frame_typep(frame)) {
case FRAME_LUA: /* Lua frame. */
@@ -114,9 +114,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
frame = frame_prevl(frame);
break;
case FRAME_C: /* C frame. */
-#if LJ_HASFFI
unwind_c:
-#endif
#if LJ_UNWIND_EXT
if (errcode) {
L->base = frame_prevd(frame) + 1;
@@ -150,10 +148,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
}
return cf;
case FRAME_CONT: /* Continuation frame. */
-#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+ if (frame_iscont_fficb(frame))
goto unwind_c;
-#endif
case FRAME_VARG: /* Vararg frame. */
frame = frame_prevd(frame);
break;
@@ -175,7 +171,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
}
/* No C frame. */
if (errcode) {
- L->base = tvref(L->stack)+1;
+ L->base = tvref(L->stack)+1+LJ_FR2;
L->cframe = NULL;
unwindstack(L, L->base);
if (G(L)->panic)
@@ -498,10 +494,9 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
/* Find error function for runtime errors. Requires an extra stack traversal. */
static ptrdiff_t finderrfunc(lua_State *L)
{
- cTValue *frame = L->base-1, *bot = tvref(L->stack);
+ cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
void *cf = L->cframe;
- while (frame > bot) {
- lua_assert(cf != NULL);
+ while (frame > bot && cf) {
while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
if (frame >= restorestack(L, -cframe_nres(cf)))
break;
@@ -523,10 +518,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
frame = frame_prevd(frame);
break;
case FRAME_CONT:
-#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+ if (frame_iscont_fficb(frame))
cf = cframe_prev(cf);
-#endif
frame = frame_prevd(frame);
break;
case FRAME_CP:
@@ -537,8 +530,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
break;
case FRAME_PCALL:
case FRAME_PCALLH:
- if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */
- return savestack(L, frame-1); /* Point to xpcall's errorfunc. */
+ if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
+ return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
return 0;
default:
lua_assert(0);
@@ -561,8 +554,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
lj_err_throw(L, LUA_ERRERR);
}
L->status = LUA_ERRERR;
- copyTV(L, top, top-1);
+ copyTV(L, top+LJ_FR2, top-1);
copyTV(L, top-1, errfunc);
+ if (LJ_FR2) setnilV(top++);
L->top = top+1;
lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
}
@@ -637,8 +631,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
const BCIns *pc = cframe_Lpc(L);
if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
const char *tname = lj_typename(o);
+ if (LJ_FR2) o++;
setframe_pc(o, pc);
- setframe_gc(o, obj2gco(L));
+ setframe_gc(o, obj2gco(L), LJ_TTHREAD);
L->top = L->base = o+1;
err_msgv(L, LJ_ERR_BADCALL, tname);
}
@@ -653,13 +648,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
if (frame_islua(frame)) {
pframe = frame_prevl(frame);
} else if (frame_iscont(frame)) {
-#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
+ if (frame_iscont_fficb(frame)) {
pframe = frame;
frame = NULL;
- } else
-#endif
- {
+ } else {
pframe = frame_prevd(frame);
#if LJ_HASFFI
/* Remove frame for FFI metamethods. */
@@ -728,9 +720,23 @@ LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em)
/* Typecheck error for arguments. */
LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
{
- TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
- const char *tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
- const char *msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
+ const char *tname, *msg;
+ if (narg <= LUA_REGISTRYINDEX) {
+ if (narg >= LUA_GLOBALSINDEX) {
+ tname = lj_obj_itypename[~LJ_TTAB];
+ } else {
+ GCfunc *fn = curr_func(L);
+ int idx = LUA_GLOBALSINDEX - narg;
+ if (idx <= fn->c.nupvalues)
+ tname = lj_typename(&fn->c.upvalue[idx-1]);
+ else
+ tname = lj_obj_typename[0];
+ }
+ } else {
+ TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
+ tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
+ }
+ msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
err_argmsg(L, narg, msg);
}
diff --git a/src/lj_err.h b/src/lj_err.h
index 7ea512a..03a56f0 100644
--- a/src/lj_err.h
+++ b/src/lj_err.h
@@ -1,6 +1,6 @@
/*
** Error handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_ERR_H
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index ad9318c..7717665 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -1,6 +1,6 @@
/*
** VM error messages.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* This file may be included multiple times with different ERRDEF macros. */
diff --git a/src/lj_ff.h b/src/lj_ff.h
index d91a739..73dad96 100644
--- a/src/lj_ff.h
+++ b/src/lj_ff.h
@@ -1,6 +1,6 @@
/*
** Fast function IDs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_FF_H
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 1fff4f4..e17f681 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -1,6 +1,6 @@
/*
** Fast function call recorder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_ffrecord_c
@@ -107,9 +107,10 @@ static void recff_stitch(jit_State *J)
TValue *pframe = frame_prevl(base-1);
TRef trcont;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
/* Move func + args up in Lua stack and insert continuation. */
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
- setframe_ftsz(base+1, (int)((char *)(base+1) - (char *)pframe) + FRAME_CONT);
+ setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
setframe_pc(base, pc);
if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
@@ -195,7 +196,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
uint32_t t;
if (tvisnumber(&rd->argv[0]))
t = ~LJ_TNUMX;
- else if (LJ_64 && tvislightud(&rd->argv[0]))
+ else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
t = ~LJ_TLIGHTUD;
else
t = ~itype(&rd->argv[0]);
@@ -466,6 +467,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
TValue argv0, argv1;
TRef tmp;
int errcode;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
/* Swap function and traceback. */
tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
copyTV(J->L, &argv0, &rd->argv[0]);
diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h
index b3bc662..f858ca2 100644
--- a/src/lj_ffrecord.h
+++ b/src/lj_ffrecord.h
@@ -1,6 +1,6 @@
/*
** Fast function call recorder.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_FFRECORD_H
diff --git a/src/lj_frame.h b/src/lj_frame.h
index fb533b0..b9595a5 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -1,6 +1,6 @@
/*
** Stack frames.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_FRAME_H
@@ -11,7 +11,16 @@
/* -- Lua stack frame ----------------------------------------------------- */
-/* Frame type markers in callee function slot (callee base-1). */
+/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
+**
+** PC 00 Lua frame
+** delta 001 C frame
+** delta 010 Continuation frame
+** delta 011 Lua vararg frame
+** delta 101 cpcall() frame
+** delta 110 ff pcall() frame
+** delta 111 ff pcall() frame with active hook
+*/
enum {
FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
/* Macros to access and modify Lua frames. */
+#if LJ_FR2
+/* Two-slot frame info, required for 64 bit PC/GCRef:
+**
+** base-2 base-1 | base base+1 ...
+** [func PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+**
+** Continuation frames:
+**
+** base-4 base-3 base-2 base-1 | base base+1 ...
+** [cont PC ] [func PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+*/
+#define frame_gc(f) (gcval((f)-1))
+#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
+#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
+#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp)))
+#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
+#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
+#else
+/* One-slot frame info, sufficient for 32 bit PC/GCRef:
+**
+** base-1 | base base+1 ...
+** lo hi |
+** [func | PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+**
+** Continuation frames:
+**
+** base-2 base-1 | base base+1 ...
+** lo hi lo hi |
+** [cont | PC] [func | PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+*/
#define frame_gc(f) (gcref((f)->fr.func))
-#define frame_func(f) (&frame_gc(f)->fn)
-#define frame_ftsz(f) ((f)->fr.tp.ftsz)
+#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
+#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
+#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
+#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
+#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
+#endif
#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,27 +80,36 @@ enum {
#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
-#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
+#define frame_func(f) (&frame_gc(f)->fn)
+#define frame_delta(f) (frame_ftsz(f) >> 3)
+#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
+
+enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+
+#if LJ_FR2
+#define frame_contpc(f) (frame_pc((f)-2))
+#define frame_contv(f) (((f)-3)->u64)
+#else
#define frame_contpc(f) (frame_pc((f)-1))
-#if LJ_64
+#define frame_contv(f) (((f)-1)->u32.lo)
+#endif
+#if LJ_FR2
+#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
+#elif LJ_64
#define frame_contf(f) \
((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
(intptr_t)(int32_t)((f)-1)->u32.lo))
#else
#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
#endif
-#define frame_delta(f) (frame_ftsz(f) >> 3)
-#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
+#define frame_iscont_fficb(f) \
+ (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
-#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1])))
+#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
/* Note: this macro does not skip over FRAME_VARG. */
-#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
-#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
-#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
-
/* -- C stack frame ------------------------------------------------------- */
/* Macros to access and modify the C stack frame chain. */
@@ -103,6 +159,15 @@ enum {
#define CFRAME_SIZE 64
#endif
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_TARGET_ARM64
+#define CFRAME_OFS_ERRF 196
+#define CFRAME_OFS_NRES 200
+#define CFRAME_OFS_PREV 160
+#define CFRAME_OFS_L 176
+#define CFRAME_OFS_PC 168
+#define CFRAME_OFS_MULTRES 192
+#define CFRAME_SIZE 208
+#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_PPC
#if LJ_TARGET_XBOX360
#define CFRAME_OFS_ERRF 424
@@ -113,7 +178,7 @@ enum {
#define CFRAME_OFS_MULTRES 408
#define CFRAME_SIZE 384
#define CFRAME_SHIFT_MULTRES 3
-#elif LJ_ARCH_PPC64
+#elif LJ_ARCH_PPC32ON64
#define CFRAME_OFS_ERRF 472
#define CFRAME_OFS_NRES 468
#define CFRAME_OFS_PREV 448
@@ -132,15 +197,6 @@ enum {
#define CFRAME_SIZE 272
#define CFRAME_SHIFT_MULTRES 3
#endif
-#elif LJ_TARGET_PPCSPE
-#define CFRAME_OFS_ERRF 28
-#define CFRAME_OFS_NRES 24
-#define CFRAME_OFS_PREV 20
-#define CFRAME_OFS_L 16
-#define CFRAME_OFS_PC 12
-#define CFRAME_OFS_MULTRES 8
-#define CFRAME_SIZE 184
-#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_MIPS
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
diff --git a/src/lj_func.c b/src/lj_func.c
index 9a59b0f..eb8a9db 100644
--- a/src/lj_func.c
+++ b/src/lj_func.c
@@ -1,6 +1,6 @@
/*
** Function handling (prototypes, functions and upvalues).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/lj_func.h b/src/lj_func.h
index 88732e8..a6e534e 100644
--- a/src/lj_func.h
+++ b/src/lj_func.h
@@ -1,6 +1,6 @@
/*
** Function handling (prototypes, functions and upvalues).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_FUNC_H
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 376c9d0..99d664a 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -1,6 +1,6 @@
/*
** Garbage collector.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -268,12 +268,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
{
TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
/* Note: extra vararg frame not skipped, marks function twice (harmless). */
- for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) {
+ for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
GCfunc *fn = frame_func(frame);
TValue *ftop = frame;
if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
if (ftop > top) top = ftop;
- gc_markobj(g, fn); /* Need to mark hidden function (or L). */
+ if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
}
top++; /* Correct bias of -1 (frame == base-1). */
if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -284,7 +284,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
static void gc_traverse_thread(global_State *g, lua_State *th)
{
TValue *o, *top = th->top;
- for (o = tvref(th->stack)+1; o < top; o++)
+ for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
gc_marktv(g, o);
if (g->gc.state == GCSatomic) {
top = tvref(th->stack) + th->stacksize;
@@ -374,7 +374,7 @@ static const GCFreeFunc gc_freefunc[] = {
};
/* Full sweep of a GC list. */
-#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM)
+#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
/* Partial sweep of a GC list. */
static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -452,17 +452,18 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
{
/* Save and restore lots of state around the __gc callback. */
uint8_t oldh = hook_save(g);
- MSize oldt = g->gc.threshold;
+ GCSize oldt = g->gc.threshold;
int errcode;
TValue *top;
lj_trace_abort(g);
- top = L->top;
- L->top = top+2;
hook_entergc(g); /* Disable hooks and new traces during __gc. */
g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
- copyTV(L, top, mo);
- setgcV(L, top+1, o, ~o->gch.gct);
- errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */
+ top = L->top;
+ copyTV(L, top++, mo);
+ if (LJ_FR2) setnilV(top++);
+ setgcV(L, top, o, ~o->gch.gct);
+ L->top = top+1;
+ errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
hook_restore(g, oldh);
g->gc.threshold = oldt; /* Restore GC threshold. */
if (errcode)
@@ -590,7 +591,7 @@ static void atomic(global_State *g, lua_State *L)
g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
g->strempty.marked = g->gc.currentwhite;
setmref(g->gc.sweep, &g->gc.root);
- g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */
+ g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
}
/* GC state machine. Returns a cost estimate for each step performed. */
@@ -614,7 +615,7 @@ static size_t gc_onestep(lua_State *L)
g->gc.sweepstr = 0;
return 0;
case GCSsweepstring: {
- MSize old = g->gc.total;
+ GCSize old = g->gc.total;
gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
if (g->gc.sweepstr > g->strmask)
g->gc.state = GCSsweep; /* All string hash chains sweeped. */
@@ -623,7 +624,7 @@ static size_t gc_onestep(lua_State *L)
return GCSWEEPCOST;
}
case GCSsweep: {
- MSize old = g->gc.total;
+ GCSize old = g->gc.total;
setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
lua_assert(old >= g->gc.total);
g->gc.estimate -= old - g->gc.total;
@@ -667,7 +668,7 @@ static size_t gc_onestep(lua_State *L)
int LJ_FASTCALL lj_gc_step(lua_State *L)
{
global_State *g = G(L);
- MSize lim;
+ GCSize lim;
int32_t ostate = g->vmstate;
setvmstate(g, GC);
lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -676,13 +677,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
if (g->gc.total > g->gc.threshold)
g->gc.debt += g->gc.total - g->gc.threshold;
do {
- lim -= (MSize)gc_onestep(L);
+ lim -= (GCSize)gc_onestep(L);
if (g->gc.state == GCSpause) {
g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
g->vmstate = ostate;
return 1; /* Finished a GC cycle. */
}
- } while ((int32_t)lim > 0);
+ } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
if (g->gc.debt < GCSTEPSIZE) {
g->gc.threshold = g->gc.total + GCSTEPSIZE;
g->vmstate = ostate;
@@ -801,7 +802,7 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
/* -- Allocator ----------------------------------------------------------- */
/* Call pluggable memory allocator to allocate or resize a fragment. */
-void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
+void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
{
global_State *g = G(L);
lua_assert((osz == 0) == (p == NULL));
@@ -809,19 +810,19 @@ void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
if (p == NULL && nsz > 0)
lj_err_mem(L);
lua_assert((nsz == 0) == (p == NULL));
- lua_assert(checkptr32(p));
+ lua_assert(checkptrGC(p));
g->gc.total = (g->gc.total - osz) + nsz;
return p;
}
/* Allocate new GC object and link it to the root set. */
-void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size)
+void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
{
global_State *g = G(L);
GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
if (o == NULL)
lj_err_mem(L);
- lua_assert(checkptr32(o));
+ lua_assert(checkptrGC(o));
g->gc.total += size;
setgcrefr(o->gch.nextgc, g->gc.root);
setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index c85d075..847eb78 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -1,6 +1,6 @@
/*
** Garbage collector.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_GC_H
@@ -107,8 +107,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
/* Allocator. */
-LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz);
-LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size);
+LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
+LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
MSize *szp, MSize lim, MSize esz);
@@ -116,13 +116,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
{
- g->gc.total -= (MSize)osize;
+ g->gc.total -= (GCSize)osize;
g->allocf(g->allocd, p, osize, 0);
}
-#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t))))
+#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
#define lj_mem_reallocvec(L, p, on, n, t) \
- ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t))))
+ ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
#define lj_mem_growvec(L, p, n, m, t) \
((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 7135833..c289cd8 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -1,6 +1,6 @@
/*
** Client for the GDB JIT API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_gdbjit_c
diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h
index 9ae504f..49c5863 100644
--- a/src/lj_gdbjit.h
+++ b/src/lj_gdbjit.h
@@ -1,6 +1,6 @@
/*
** Client for the GDB JIT API.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_GDBJIT_H
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 460cd30..9682e05 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -1,6 +1,6 @@
/*
** SSA IR (Intermediate Representation) emitter.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_ir_c
@@ -253,7 +253,7 @@ TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
goto found;
ref = ir_nextk(J);
ir = IR(ref);
- lua_assert(checkptr32(tv));
+ lua_assert(checkptrGC(tv));
setmref(ir->ptr, tv);
ir->t.irt = t;
ir->o = op;
@@ -307,6 +307,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
+ lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */
lua_assert(!isdead(J2G(J), o));
for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
if (ir_kgc(&cir[ref]) == o)
@@ -392,7 +393,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
UNUSED(L);
lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
switch (ir->o) {
- case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break;
+ case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
case IR_KINT: setintV(tv, ir->i); break;
case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
case IR_KPTR: case IR_KKPTR: case IR_KNULL:
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 14b8616..56e1977 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -1,6 +1,6 @@
/*
** SSA IR (Intermediate Representation) format.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_IR_H
@@ -320,6 +320,7 @@ IRTDEF(IRTENUM)
IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
+ /* TODO_GC64: major changes required for all uses of IRT_P32. */
/* Additional flags. */
IRT_MARK = 0x20, /* Marker for misc. purposes. */
@@ -371,7 +372,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
-#if LJ_64
+#if LJ_GC64
+#define IRT_IS64 \
+ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
+ (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
+ (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
+#elif LJ_64
#define IRT_IS64 \
((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
#else
@@ -392,7 +398,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
return IRT_INT;
else if (tvisnum(tv))
return IRT_NUM;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
else if (tvislightud(tv))
return IRT_LIGHTUD;
#endif
@@ -547,6 +553,7 @@ typedef union IRIns {
MRef ptr; /* Pointer constant (overlaps op12). */
} IRIns;
+/* TODO_GC64: major changes required. */
#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 9bf4691..84e41ec 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -1,6 +1,6 @@
/*
** IR CALL* instruction definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_IRCALL_H
@@ -169,18 +169,18 @@ typedef struct CCallInfo {
_(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
_(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
_(FPMATH, sqrt, 1, N, NUM, XA_FP) \
- _(FPMATH, exp, 1, N, NUM, XA_FP) \
- _(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \
- _(FPMATH, log, 1, N, NUM, XA_FP) \
- _(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \
- _(FPMATH, log10, 1, N, NUM, XA_FP) \
- _(FPMATH, sin, 1, N, NUM, XA_FP) \
- _(FPMATH, cos, 1, N, NUM, XA_FP) \
- _(FPMATH, tan, 1, N, NUM, XA_FP) \
- _(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \
- _(FPMATH, pow, 2, N, NUM, XA2_FP) \
- _(FPMATH, atan2, 2, N, NUM, XA2_FP) \
- _(FPMATH, ldexp, 2, N, NUM, XA_FP) \
+ _(ANY, exp, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
+ _(ANY, log, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
+ _(ANY, log10, 1, N, NUM, XA_FP) \
+ _(ANY, sin, 1, N, NUM, XA_FP) \
+ _(ANY, cos, 1, N, NUM, XA_FP) \
+ _(ANY, tan, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
+ _(ANY, pow, 2, N, NUM, XA2_FP) \
+ _(ANY, atan2, 2, N, NUM, XA2_FP) \
+ _(ANY, ldexp, 2, N, NUM, XA_FP) \
_(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
_(SOFTFP, softfp_add, 4, N, NUM, 0) \
_(SOFTFP, softfp_sub, 4, N, NUM, 0) \
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index e45ae2b..4e424e7 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -1,6 +1,6 @@
/*
** Common header for IR emitter and optimizations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_IROPT_H
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 4246e9d..4b51bae 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -1,6 +1,6 @@
/*
** Common definitions for the JIT compiler.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_JIT_H
@@ -213,6 +213,9 @@ typedef struct GCtrace {
uint8_t topslot; /* Top stack slot already checked to be allocated. */
uint8_t linktype; /* Type of link. */
IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
+#if LJ_GC64
+ uint32_t unused_gc64;
+#endif
GCRef gclist;
IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 49e1e88..8409cd7 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -1,6 +1,6 @@
/*
** Lexical analyzer.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -375,6 +375,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->vtop = 0;
ls->bcstack = NULL;
ls->sizebcstack = 0;
+ ls->tok = 0;
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 000af12..acd2285 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -1,6 +1,6 @@
/*
** Lexical analyzer.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_LEX_H
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 9f84488..b16d056 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -1,6 +1,6 @@
/*
** Library function support.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_lib_c
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 5bfd8d7..3fa7aa1 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -1,6 +1,6 @@
/*
** Library function support.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_LIB_H
@@ -47,8 +47,16 @@ LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
/* Avoid including lj_frame.h. */
+#if LJ_GC64
+#define lj_lib_upvalue(L, n) \
+ (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
+#elif LJ_FR2
+#define lj_lib_upvalue(L, n) \
+ (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
+#else
#define lj_lib_upvalue(L, n) \
(&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
+#endif
#if LJ_TARGET_WINDOWS
#define lj_lib_checkfpu(L) \
diff --git a/src/lj_load.c b/src/lj_load.c
index 160e82f..95a6ab0 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -1,6 +1,6 @@
/*
** Load and dump code.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include <errno.h>
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 5a73296..d95ebeb 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -1,6 +1,6 @@
/*
** Machine code management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_mcode_c
@@ -145,7 +145,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
/* -- MCode area protection ----------------------------------------------- */
-/* Define this ONLY if the page protection twiddling becomes a bottleneck. */
+/* Define this ONLY if page protection twiddling becomes a bottleneck. */
#ifdef LUAJIT_UNPROTECT_MCODE
/* It's generally considered to be a potential security risk to have
@@ -252,7 +252,20 @@ static void *mcode_alloc(jit_State *J, size_t sz)
#else
/* All memory addresses are reachable by relative jumps. */
-#define mcode_alloc(J, sz) mcode_alloc_at((J), 0, (sz), MCPROT_GEN)
+static void *mcode_alloc(jit_State *J, size_t sz)
+{
+#ifdef __OpenBSD__
+ /* Allow better executable memory allocation for OpenBSD W^X mode. */
+ void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
+ if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
+ mcode_free(J, p, sz);
+ return NULL;
+ }
+ return p;
+#else
+ return mcode_alloc_at(J, 0, sz, MCPROT_GEN);
+#endif
+}
#endif
diff --git a/src/lj_mcode.h b/src/lj_mcode.h
index 89344fc..ee60452 100644
--- a/src/lj_mcode.h
+++ b/src/lj_mcode.h
@@ -1,6 +1,6 @@
/*
** Machine code management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_MCODE_H
diff --git a/src/lj_meta.c b/src/lj_meta.c
index dea456f..104ecf0 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -1,6 +1,6 @@
/*
** Metamethod handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -80,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
TValue *base = L->base;
TValue *top = L->top;
const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
- copyTV(L, base-1, tv); /* Replace frame with new object. */
- top->u32.lo = LJ_CONT_TAILCALL;
- setframe_pc(top, pc);
- setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */
- setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT);
- L->base = L->top = top+2;
+ copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
+ if (LJ_FR2)
+ (top++)->u64 = LJ_CONT_TAILCALL;
+ else
+ top->u32.lo = LJ_CONT_TAILCALL;
+ setframe_pc(top++, pc);
+ if (LJ_FR2) top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
+ setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
+ L->base = L->top = top+1;
/*
** before: [old_mo|PC] [... ...]
** ^base ^top
@@ -116,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
*/
TValue *top = L->top;
if (curr_funcisL(L)) top = curr_topL(L);
- setcont(top, cont); /* Assembler VM stores PC in upper word. */
- copyTV(L, top+1, mo); /* Store metamethod and two arguments. */
- copyTV(L, top+2, a);
- copyTV(L, top+3, b);
- return top+2; /* Return new base. */
+ setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, mo); /* Store metamethod and two arguments. */
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top, a);
+ copyTV(L, top+1, b);
+ return top; /* Return new base. */
}
/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -256,10 +262,11 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
** after mm: [...][CAT stack ...] <--push-- [result]
** next step: [...][CAT stack .............]
*/
- copyTV(L, top+2, top); /* Careful with the order of stack copies! */
- copyTV(L, top+1, top-1);
- copyTV(L, top, mo);
+ copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
+ copyTV(L, top+2*LJ_FR2+1, top-1);
+ copyTV(L, top+LJ_FR2, mo);
setcont(top-1, lj_cont_cat);
+ if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
return top+1; /* Trigger metamethod call. */
} else {
/* Pick as many strings as possible from the top and concatenate them:
@@ -327,12 +334,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
return (TValue *)(intptr_t)ne;
}
top = curr_top(L);
- setcont(top, ne ? lj_cont_condf : lj_cont_condt);
- copyTV(L, top+1, mo);
+ setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, mo);
+ if (LJ_FR2) setnilV(top++);
it = ~(uint32_t)o1->gch.gct;
- setgcV(L, top+2, o1, it);
- setgcV(L, top+3, o2, it);
- return top+2; /* Trigger metamethod call. */
+ setgcV(L, top, o1, it);
+ setgcV(L, top+1, o2, it);
+ return top; /* Trigger metamethod call. */
}
return (TValue *)(intptr_t)ne;
}
@@ -355,7 +364,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
} else {
lua_assert(op == BC_ISEQP);
- setitype(&tv, ~bc_d(ins));
+ setpriV(&tv, ~bc_d(ins));
o2 = &tv;
}
mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -431,7 +440,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
TValue *p;
if (!tvisfunc(mo))
lj_err_optype_call(L, func);
- for (p = top; p > func; p--) copyTV(L, p, p-1);
+ for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
+ if (LJ_FR2) copyTV(L, func+2, func);
copyTV(L, func, mo);
}
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 5068f7a..7f71633 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -1,6 +1,6 @@
/*
** Metamethod handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_META_H
diff --git a/src/lj_obj.c b/src/lj_obj.c
index e806716..b78d2c8 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -1,6 +1,6 @@
/*
** Miscellaneous object handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_obj_c
diff --git a/src/lj_obj.h b/src/lj_obj.h
index daa62e3..74ed59b 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -1,6 +1,6 @@
/*
** LuaJIT VM tags, values and objects.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -15,42 +15,75 @@
/* -- Memory references (32 bit address space) ---------------------------- */
-/* Memory size. */
+/* Memory and GC object sizes. */
typedef uint32_t MSize;
+#if LJ_GC64
+typedef uint64_t GCSize;
+#else
+typedef uint32_t GCSize;
+#endif
/* Memory reference */
typedef struct MRef {
+#if LJ_GC64
+ uint64_t ptr64; /* True 64 bit pointer. */
+#else
uint32_t ptr32; /* Pseudo 32 bit pointer. */
+#endif
} MRef;
+#if LJ_GC64
+#define mref(r, t) ((t *)(void *)(r).ptr64)
+
+#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
+#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
+#else
#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
+#endif
/* -- GC object references (32 bit address space) ------------------------- */
/* GCobj reference */
typedef struct GCRef {
+#if LJ_GC64
+ uint64_t gcptr64; /* True 64 bit pointer. */
+#else
uint32_t gcptr32; /* Pseudo 32 bit pointer. */
+#endif
} GCRef;
/* Common GC header for all collectable objects. */
#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
+#if LJ_GC64
+#define gcref(r) ((GCobj *)(r).gcptr64)
+#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
+#define gcrefu(r) ((r).gcptr64)
+#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
+
+#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
+#define setgcreft(r, gc, it) \
+ (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
+#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
+#define setgcrefnull(r) ((r).gcptr64 = 0)
+#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
+#else
#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
#define gcrefu(r) ((r).gcptr32)
-#define gcrefi(r) ((int32_t)(r).gcptr32)
#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
-#define gcnext(gc) (gcref((gc)->gch.nextgc))
#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
-#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
#define setgcrefnull(r) ((r).gcptr32 = 0)
#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
+#endif
+
+#define gcnext(gc) (gcref((gc)->gch.nextgc))
/* IMPORTANT NOTE:
**
@@ -132,13 +165,23 @@ typedef struct SBuf {
/* Frame link. */
typedef union {
int32_t ftsz; /* Frame type and size of previous frame. */
- MRef pcr; /* Overlaps PC for Lua frames. */
+ MRef pcr; /* Or PC for Lua frames. */
} FrameLink;
/* Tagged value. */
typedef LJ_ALIGN(8) union TValue {
uint64_t u64; /* 64 bit pattern overlaps number. */
lua_Number n; /* Number object overlaps split tag/value object. */
+#if LJ_GC64
+ GCRef gcr; /* GCobj reference with tag. */
+ int64_t it64;
+ struct {
+ LJ_ENDIAN_LOHI(
+ int32_t i; /* Integer value. */
+ , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
+ )
+ };
+#else
struct {
LJ_ENDIAN_LOHI(
union {
@@ -148,12 +191,17 @@ typedef LJ_ALIGN(8) union TValue {
, uint32_t it; /* Internal object tag. Must overlap MSW of number. */
)
};
+#endif
+#if LJ_FR2
+ int64_t ftsz; /* Frame type and size of previous frame, or PC. */
+#else
struct {
LJ_ENDIAN_LOHI(
GCRef func; /* Function for next frame (or dummy L). */
, FrameLink tp; /* Link to previous frame. */
)
} fr;
+#endif
struct {
LJ_ENDIAN_LOHI(
uint32_t lo; /* Lower 32 bits of number. */
@@ -173,6 +221,8 @@ typedef const TValue cTValue;
/* Internal object tags.
**
+** Format for 32 bit GC references (!LJ_GC64):
+**
** Internal tags overlap the MSW of a number object (must be a double).
** Interpreted as a double these are special NaNs. The FPU only generates
** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -187,6 +237,18 @@ typedef const TValue cTValue;
** int (LJ_DUALNUM)| itype | int |
** number -------double------
**
+** Format for 64 bit GC references (LJ_GC64):
+**
+** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
+** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
+** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
+**
+** ------MSW------.------LSW------
+** primitive types |1..1|itype|1..................1|
+** GC objects/lightud |1..1|itype|-------GCRef--------|
+** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
+** number ------------double-------------
+**
** ORDER LJ_T
** Primitive types nil/false/true must be first, lightuserdata next.
** GC objects are at the end, table/userdata must be lowest.
@@ -209,7 +271,7 @@ typedef const TValue cTValue;
#define LJ_TNUMX (~13u)
/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define LJ_TISNUM 0xfffeffffu
#else
#define LJ_TISNUM LJ_TNUMX
@@ -219,6 +281,10 @@ typedef const TValue cTValue;
#define LJ_TISGCV (LJ_TSTR+1)
#define LJ_TISTABUD LJ_TTAB
+#if LJ_GC64
+#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
+#endif
+
/* -- String object ------------------------------------------------------- */
/* String object header. String payload follows. */
@@ -292,6 +358,9 @@ typedef struct GCproto {
uint8_t numparams; /* Number of parameters. */
uint8_t framesize; /* Fixed frame size. */
MSize sizebc; /* Number of bytecode instructions. */
+#if LJ_GC64
+ uint32_t unused_gc64;
+#endif
GCRef gclist;
MRef k; /* Split constant array (points to the middle). */
MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -403,7 +472,9 @@ typedef struct Node {
TValue val; /* Value object. Must be first field. */
TValue key; /* Key object. */
MRef next; /* Hash chain. */
+#if !LJ_GC64
MRef freetop; /* Top of free elements (stored in t->node[0]). */
+#endif
} Node;
LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -418,12 +489,22 @@ typedef struct GCtab {
MRef node; /* Hash part. */
uint32_t asize; /* Size of array part (keys [0, asize-1]). */
uint32_t hmask; /* Hash part mask (size of hash part - 1). */
+#if LJ_GC64
+ MRef freetop; /* Top of free elements. */
+#endif
} GCtab;
#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
#define tabref(r) (&gcref((r))->tab)
#define noderef(r) (mref((r), Node))
#define nextnode(n) (mref((n)->next, Node))
+#if LJ_GC64
+#define getfreetop(t, n) (noderef((t)->freetop))
+#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
+#else
+#define getfreetop(t, n) (noderef((n)->freetop))
+#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
+#endif
/* -- State objects ------------------------------------------------------- */
@@ -490,8 +571,8 @@ typedef enum {
#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
typedef struct GCState {
- MSize total; /* Memory currently allocated. */
- MSize threshold; /* Memory threshold. */
+ GCSize total; /* Memory currently allocated. */
+ GCSize threshold; /* Memory threshold. */
uint8_t currentwhite; /* Current white color. */
uint8_t state; /* GC state. */
uint8_t nocdatafin; /* No cdata finalizer called. */
@@ -503,9 +584,9 @@ typedef struct GCState {
GCRef grayagain; /* List of objects for atomic traversal. */
GCRef weak; /* List of weak tables (to be cleared). */
GCRef mmudata; /* List of userdata (to be finalized). */
+ GCSize debt; /* Debt (how much GC is behind schedule). */
+ GCSize estimate; /* Estimate of memory actually in use. */
MSize stepmul; /* Incremental GC step granularity. */
- MSize debt; /* Debt (how much GC is behind schedule). */
- MSize estimate; /* Estimate of memory actually in use. */
MSize pause; /* Pause between successive GC cycles. */
} GCState;
@@ -585,7 +666,13 @@ struct lua_State {
#define registry(L) (&G(L)->registrytv)
/* Macros to access the currently executing (Lua) function. */
+#if LJ_GC64
+#define curr_func(L) (&gcval(L->base-2)->fn)
+#elif LJ_FR2
+#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
+#else
#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
+#endif
#define curr_funcisL(L) (isluafunc(curr_func(L)))
#define curr_proto(L) (funcproto(curr_func(L)))
#define curr_topL(L) (L->base + curr_proto(L)->framesize)
@@ -649,12 +736,17 @@ typedef union GCobj {
#endif
/* Macros to test types. */
+#if LJ_GC64
+#define itype(o) ((uint32_t)((o)->it64 >> 47))
+#define tvisnil(o) ((o)->it64 == -1)
+#else
#define itype(o) ((o)->it)
#define tvisnil(o) (itype(o) == LJ_TNIL)
+#endif
#define tvisfalse(o) (itype(o) == LJ_TFALSE)
#define tvistrue(o) (itype(o) == LJ_TTRUE)
#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
#else
#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -688,7 +780,7 @@ typedef union GCobj {
#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
/* Macros to convert type ids. */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define itypemap(o) \
(tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
#else
@@ -696,8 +788,12 @@ typedef union GCobj {
#endif
/* Macros to get tagged values. */
+#if LJ_GC64
+#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
+#else
#define gcval(o) (gcref((o)->gcr))
-#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it))
+#endif
+#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
#if LJ_64
#define lightudV(o) \
check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
@@ -716,13 +812,23 @@ typedef union GCobj {
#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
/* Macros to set tagged values. */
+#if LJ_GC64
+#define setitype(o, i) ((o)->it = ((i) << 15))
+#define setnilV(o) ((o)->it64 = -1)
+#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
+#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
+#else
#define setitype(o, i) ((o)->it = (i))
#define setnilV(o) ((o)->it = LJ_TNIL)
#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
+#define setpriV(o, i) (setitype((o), (i)))
+#endif
static LJ_AINLINE void setlightudV(TValue *o, void *p)
{
-#if LJ_64
+#if LJ_GC64
+ o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
+#elif LJ_64
o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
#else
setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
@@ -732,10 +838,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
#if LJ_64
#define checklightudptr(L, p) \
(((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
+#else
+#define checklightudptr(L, p) (p)
+#endif
+
+#if LJ_FR2
+#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f))
+#elif LJ_64
#define setcont(o, f) \
((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
#else
-#define checklightudptr(L, p) (p)
#define setcont(o, f) setlightudV((o), (void *)(f))
#endif
@@ -743,9 +855,18 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
UNUSED(L), lua_assert(!tvisgcv(o) || \
((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
-static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype)
+static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
+{
+#if LJ_GC64
+ setgcreft(o->gcr, v, itype);
+#else
+ setgcref(o->gcr, v); setitype(o, itype);
+#endif
+}
+
+static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
{
- setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o);
+ setgcVraw(o, v, it); tvchecklive(L, o);
}
#define define_setV(name, type, tag) \
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c
index fdfe934..7f1faaf 100644
--- a/src/lj_opt_dce.c
+++ b/src/lj_opt_dce.c
@@ -1,6 +1,6 @@
/*
** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_dce_c
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index a3af310..f809a99 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -2,7 +2,7 @@
** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
** ABCelim: Array Bounds Check Elimination.
** CSE: Common-Subexpression Elimination.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_fold_c
@@ -1167,11 +1167,16 @@ LJFOLDF(simplify_conv_flt_num)
LJFOLD(TOBIT CONV KNUM)
LJFOLDF(simplify_tobit_conv)
{
- if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
- (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
- /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+ /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+ if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
lua_assert(irt_isnum(fleft->t));
return fleft->op1;
+ } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
+ lua_assert(irt_isnum(fleft->t));
+ fins->o = IR_CONV;
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRT_INT<<5)|IRT_U32;
+ return RETRYFOLD;
}
return NEXTFOLD;
}
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 91f8067..4b4ab7d 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -1,6 +1,6 @@
/*
** LOOP: Loop Optimizations.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_loop_c
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 902392b..e04a622 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -3,7 +3,7 @@
** AA: Alias Analysis using high-level semantic disambiguation.
** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
** DSE: Dead-Store Elimination.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_mem_c
@@ -808,6 +808,7 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
IRRef ref = *refp;
if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
+ if (J->chain[IR_XSNEW] > lim) lim = J->chain[IR_XSNEW];
while (ref > lim) { /* Search for redundant or conflicting stores. */
IRIns *store = IR(ref);
switch (aa_xref(J, xr, fins, store)) {
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 5effcdb..d221c30 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -1,7 +1,7 @@
/*
** NARROW: Narrowing of numbers to integers (double to int32_t).
** STRIPOV: Stripping of overflow checks.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_narrow_c
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index 3a33346..a98e9df 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -1,6 +1,6 @@
/*
** SINK: Allocation Sinking and Store Sinking.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_sink_c
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 05ea1cc..81ded6c 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -1,6 +1,6 @@
/*
** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_split_c
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 064b9b1..9891897 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -1,6 +1,6 @@
/*
** Lua parser (source code -> bytecode).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -662,16 +662,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
BCReg idx, func, obj = expr_toanyreg(fs, e);
expr_free(fs, e);
func = fs->freereg;
- bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */
+ bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
lua_assert(expr_isstrk(key));
idx = const_str(fs, key);
if (idx <= BCMAX_C) {
- bcreg_reserve(fs, 2);
+ bcreg_reserve(fs, 2+LJ_FR2);
bcemit_ABC(fs, BC_TGETS, func, obj, idx);
} else {
- bcreg_reserve(fs, 3);
- bcemit_AD(fs, BC_KSTR, func+2, idx);
- bcemit_ABC(fs, BC_TGETV, func, obj, func+2);
+ bcreg_reserve(fs, 3+LJ_FR2);
+ bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
+ bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
fs->freereg--;
}
e->u.s.info = func;
@@ -687,10 +687,12 @@ static BCPos bcemit_jmp(FuncState *fs)
BCPos j = fs->pc - 1;
BCIns *ip = &fs->bcbase[j].ins;
fs->jpc = NO_JMP;
- if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(*ip) == BC_UCLO)
+ if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(*ip) == BC_UCLO) {
setbc_j(ip, NO_JMP);
- else
+ fs->lasttarget = j+1;
+ } else {
j = bcemit_AJ(fs, BC_JMP, fs->freereg, NO_JMP);
+ }
jmp_append(fs, &j, jpc);
return j;
}
@@ -1684,10 +1686,9 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
static void expr_kvalue(TValue *v, ExpDesc *e)
{
if (e->k <= VKTRUE) {
- setitype(v, ~(uint32_t)e->k);
+ setpriV(v, ~(uint32_t)e->k);
} else if (e->k == VKSTR) {
- setgcref(v->gcr, obj2gco(e->u.sval));
- setitype(v, LJ_TSTR);
+ setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
} else {
lua_assert(tvisnumber(expr_numtv(e)));
*v = *expr_numtv(e);
@@ -1914,11 +1915,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
lua_assert(e->k == VNONRELOC);
base = e->u.s.info; /* Base register for call. */
if (args.k == VCALL) {
- ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1);
+ ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
} else {
if (args.k != VVOID)
expr_tonextreg(fs, &args);
- ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base);
+ ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
}
expr_init(e, VCALL, bcemit_INS(fs, ins));
e->u.s.aux = base;
@@ -1958,6 +1959,7 @@ static void expr_primary(LexState *ls, ExpDesc *v)
parse_args(ls, v);
} else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
expr_tonextreg(fs, v);
+ if (LJ_FR2) bcreg_reserve(fs, 1);
parse_args(ls, v);
} else {
break;
@@ -2538,7 +2540,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
lex_check(ls, TK_in);
line = ls->linenumber;
assign_adjust(ls, 3, expr_list(ls, &e), &e);
- bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */
+ /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
+ bcreg_bump(fs, 3+LJ_FR2);
isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
var_add(ls, 3); /* Hidden control variables. */
lex_check(ls, TK_do);
diff --git a/src/lj_parse.h b/src/lj_parse.h
index 532657a..dc4fd40 100644
--- a/src/lj_parse.h
+++ b/src/lj_parse.h
@@ -1,6 +1,6 @@
/*
** Lua parser (source code -> bytecode).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_PARSE_H
diff --git a/src/lj_profile.c b/src/lj_profile.c
index c11e385..0136701 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -1,6 +1,6 @@
/*
** Low-overhead profiling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_profile_c
diff --git a/src/lj_profile.h b/src/lj_profile.h
index 384d705..26cb9db 100644
--- a/src/lj_profile.h
+++ b/src/lj_profile.h
@@ -1,6 +1,6 @@
/*
** Low-overhead profiling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_PROFILE_H
diff --git a/src/lj_record.c b/src/lj_record.c
index 3ccba77..5603815 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1,6 +1,6 @@
/*
** Trace recorder (bytecode -> SSA IR).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_record_c
@@ -502,6 +502,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{
BCReg ra = bc_a(iterins);
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]);
@@ -672,6 +673,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
TValue *functv = &J->L->base[func];
TRef *fbase = &J->base[func];
ptrdiff_t i;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
for (i = 0; i <= nargs; i++)
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
@@ -788,7 +790,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
BCReg cbase = bc_a(callins);
- GCproto *pt = funcproto(frame_func(frame - (cbase+1)));
+ GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
if ((pt->flags & PROTO_NOJIT))
lj_trace_err(J, LJ_TRERR_CJITOFF);
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
@@ -812,6 +815,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
!bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */
lj_trace_err(J, LJ_TRERR_LLEAVE);
+ } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
+ lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */
} else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
@@ -973,6 +978,7 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
lj_trace_err(J, LJ_TRERR_NOMM);
}
ok:
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj;
copyTV(J->L, basev+0, &ix->mobjv);
lj_record_call(J, func, 2);
@@ -989,6 +995,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
BCReg func = rec_mm_prep(J, lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
base[1] = tr; copyTV(J->L, basev+1, tv);
#if LJ_52
@@ -1011,6 +1018,7 @@ static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
copyTV(J->L, tv+0, &ix->mobjv);
copyTV(J->L, tv+1, &ix->valv);
@@ -1261,6 +1269,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
+ lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
setfuncV(J->L, tv+0, funcV(&ix->mobjv));
copyTV(J->L, tv+1, &ix->tabv);
@@ -1627,7 +1636,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
if (nvararg >= nresults)
emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
else
- emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1)));
+ emitir(IRTGI(IR_EQ), fr,
+ lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
for (i = 0; i < nload; i++) {
@@ -1792,7 +1802,7 @@ void lj_record_ins(jit_State *J)
if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
switch (J->postproc) {
case LJ_POST_FIXCOMP: /* Fixup comparison. */
- pc = frame_pc(&J2G(J)->tmptv);
+ pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
/* fallthrough */
case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1890,7 +1900,7 @@ void lj_record_ins(jit_State *J)
switch (bcmode_c(op)) {
case BCMvar:
copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
- case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
+ case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
lj_ir_knumint(J, numV(tv)); } break;
@@ -2134,28 +2144,28 @@ void lj_record_ins(jit_State *J)
/* -- Calls and vararg handling ----------------------------------------- */
case BC_ITERC:
- J->base[ra] = getslot(J, ra-3);
- J->base[ra+1] = getslot(J, ra-2);
- J->base[ra+2] = getslot(J, ra-1);
+ J->base[ra] = getslot(J, ra-3-LJ_FR2);
+ J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
+ J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
{ /* Do the actual copy now because lj_record_call needs the values. */
TValue *b = &J->L->base[ra];
- copyTV(J->L, b, b-3);
- copyTV(J->L, b+1, b-2);
- copyTV(J->L, b+2, b-1);
+ copyTV(J->L, b, b-3-LJ_FR2);
+ copyTV(J->L, b+1, b-2-LJ_FR2);
+ copyTV(J->L, b+2, b-1-LJ_FR2);
}
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
/* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
case BC_CALLM:
- rc = (BCReg)(J->L->top - J->L->base) - ra;
+ rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
/* fallthrough */
case BC_CALL:
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
case BC_CALLMT:
- rc = (BCReg)(J->L->top - J->L->base) - ra;
+ rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
/* fallthrough */
case BC_CALLT:
lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
diff --git a/src/lj_record.h b/src/lj_record.h
index 7e38ccc..732adb4 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -1,6 +1,6 @@
/*
** Trace recorder (bytecode -> SSA IR).
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_RECORD_H
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 3438f7b..d8e7987 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -1,6 +1,6 @@
/*
** Snapshot handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_snap_c
@@ -100,6 +100,7 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
GCfunc *fn = frame_func(frame);
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
MSize f = 0;
+ lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
@@ -241,7 +242,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
case BCMbase:
if (op >= BC_CALLM && op <= BC_VARG) {
BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
- maxslot : (bc_a(ins) + bc_c(ins));
+ maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
+ if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
for (; s < top; s++) USE_SLOT(s);
for (; s < maxslot; s++) DEF_SLOT(s);
@@ -600,6 +602,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
}
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
+ lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
@@ -613,8 +616,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
o->u64 = *(uint64_t *)sps;
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
- setgcrefi(o->gcr, *sps);
- setitype(o, irt_toitype(t));
+ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
}
} else { /* Restore from register. */
Reg r = regsp_reg(rs);
@@ -632,10 +634,10 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (LJ_64 && irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
+ } else if (irt_ispri(t)) {
+ setpriV(o, irt_toitype(t));
} else {
- if (!irt_ispri(t))
- setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
- setitype(o, irt_toitype(t));
+ setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
}
}
}
@@ -796,7 +798,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
- int32_t ftsz0;
+ ptrdiff_t ftsz0;
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
const BCIns *pc = snap_pc(map[nent]);
@@ -837,8 +839,9 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo;
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+ lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
/* Overwrite tag with frame link. */
- o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
+ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
L->base = o+1;
}
}
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 1b2d74d..9a125be 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -1,6 +1,6 @@
/*
** Snapshot handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_SNAP_H
diff --git a/src/lj_state.c b/src/lj_state.c
index 444f269..84b4d11 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -1,6 +1,6 @@
/*
** State and stack handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -49,6 +49,7 @@
** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
** slots above top, but then mobj is always a function. So we can get by
** with 5 extra slots.
+** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
*/
/* Resize stack slots and adjust pointers in state. */
@@ -128,8 +129,9 @@ static void stack_init(lua_State *L1, lua_State *L)
L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
stend = st + L1->stacksize;
setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
- L1->base = L1->top = st+1;
- setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */
+ setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
+ if (LJ_FR2) setnilV(st++);
+ L1->base = L1->top = st;
while (st < stend) /* Clear new slots. */
setnilV(st++);
}
@@ -187,7 +189,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
lua_State *L = &GG->L;
global_State *g = &GG->g;
- if (GG == NULL || !checkptr32(GG)) return NULL;
+ if (GG == NULL || !checkptrGC(GG)) return NULL;
memset(GG, 0, sizeof(GG_State));
L->gct = ~LJ_TTHREAD;
L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
@@ -205,7 +207,9 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
setnilV(registry(L));
setnilV(&g->nilnode.val);
setnilV(&g->nilnode.key);
+#if !LJ_GC64
setmref(g->nilnode.freetop, &g->nilnode);
+#endif
lj_buf_init(NULL, &g->tmpbuf);
g->gc.state = GCSpause;
setgcref(g->gc.root, obj2gco(L));
@@ -253,7 +257,7 @@ LUA_API void lua_close(lua_State *L)
for (i = 0;;) {
hook_enter(g);
L->status = 0;
- L->base = L->top = tvref(L->stack) + 1;
+ L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
L->cframe = NULL;
if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
if (++i >= 10) break;
diff --git a/src/lj_state.h b/src/lj_state.h
index 74249d9..687889a 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -1,6 +1,6 @@
/*
** State and stack handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STATE_H
diff --git a/src/lj_str.c b/src/lj_str.c
index 46d546c..dd32450 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,6 +1,6 @@
/*
** String handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_str_c
diff --git a/src/lj_str.h b/src/lj_str.h
index cd1bc21..d8465de 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -1,6 +1,6 @@
/*
** String handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STR_H
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index be89f7e..d54e796 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -1,6 +1,6 @@
/*
** String formatting.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include <stdio.h>
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
index 829bef2..dcfaf2e 100644
--- a/src/lj_strfmt.h
+++ b/src/lj_strfmt.h
@@ -1,6 +1,6 @@
/*
** String formatting.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STRFMT_H
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index a21c414..568f647 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -1,6 +1,6 @@
/*
** String scanning.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include <math.h>
@@ -199,7 +199,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
*xip++ = d + ((*p != '.' ? *p : *++p) & 15); p++;
}
/* Scan and realign trailing digit. */
- if (i) *xip++ = 10 * ((*p != '.' ? *p : *++p) & 15), ex10--, p++;
+ if (i) *xip++ = 10 * ((*p != '.' ? *p : *++p) & 15), ex10--, dig++, p++;
/* Summarize rounding-effect of excess digits. */
if (dig > STRSCAN_MAXDIG) {
@@ -289,14 +289,15 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
/* Scale down until no more than 17 or 18 integer part digits remain. */
while (idig > 9) {
- uint32_t i, cy = 0;
+ uint32_t i = hi, cy = 0;
ex2 += 6;
- for (i = hi; i != lo; i = DNEXT(i)) {
+ do {
cy += xi[i];
xi[i] = (cy >> 6);
cy = 100 * (cy & 0x3f);
if (xi[i] == 0 && i == hi) hi = DNEXT(hi), idig--;
- }
+ i = DNEXT(i);
+ } while (i != lo);
while (cy) {
if (hi == lo) { xi[DPREV(lo)] |= 1; break; }
xi[lo] = (cy >> 6); lo = DNEXT(lo);
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 15abd6a..7760689 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -1,6 +1,6 @@
/*
** String scanning.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STRSCAN_H
diff --git a/src/lj_tab.c b/src/lj_tab.c
index ef19ba9..a9f4383 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -1,6 +1,6 @@
/*
** Table handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -29,7 +29,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
+#if LJ_GC64
+#define hashgcref(t, r) \
+ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
+#else
#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
+#endif
/* Hash an arbitrary key and return its anchor position in the hash table. */
static Node *hashkey(const GCtab *t, cTValue *key)
@@ -58,8 +63,8 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
lj_err_msg(L, LJ_ERR_TABOV);
hsize = 1u << hbits;
node = lj_mem_newvec(L, hsize, Node);
- setmref(node->freetop, &node[hsize]);
setmref(t->node, node);
+ setfreetop(t, node, &node[hsize]);
t->hmask = hsize-1;
}
@@ -98,6 +103,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
GCtab *t;
/* First try to colocate the array part. */
if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
+ Node *nilnode;
lua_assert((sizeof(GCtab) & 7) == 0);
t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
t->gct = ~LJ_TTAB;
@@ -107,8 +113,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
setgcrefnull(t->metatable);
t->asize = asize;
t->hmask = 0;
- setmref(t->node, &G(L)->nilnode);
+ nilnode = &G(L)->nilnode;
+ setmref(t->node, nilnode);
+#if LJ_GC64
+ setmref(t->freetop, nilnode);
+#endif
} else { /* Otherwise separately allocate the array part. */
+ Node *nilnode;
t = lj_mem_newobj(L, GCtab);
t->gct = ~LJ_TTAB;
t->nomm = (uint8_t)~0;
@@ -117,7 +128,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
setgcrefnull(t->metatable);
t->asize = 0; /* In case the array allocation fails. */
t->hmask = 0;
- setmref(t->node, &G(L)->nilnode);
+ nilnode = &G(L)->nilnode;
+ setmref(t->node, nilnode);
+#if LJ_GC64
+ setmref(t->freetop, nilnode);
+#endif
if (asize > 0) {
if (asize > LJ_MAX_ASIZE)
lj_err_msg(L, LJ_ERR_TABOV);
@@ -191,7 +206,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
Node *node = noderef(t->node);
Node *knode = noderef(kt->node);
ptrdiff_t d = (char *)node - (char *)knode;
- setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d));
+ setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
for (i = 0; i <= hmask; i++) {
Node *kn = &knode[i];
Node *n = &node[i];
@@ -210,7 +225,7 @@ void LJ_FASTCALL lj_tab_clear(GCtab *t)
clearapart(t);
if (t->hmask > 0) {
Node *node = noderef(t->node);
- setmref(node->freetop, &node[t->hmask+1]);
+ setfreetop(t, node, &node[t->hmask+1]);
clearhpart(t);
}
}
@@ -264,6 +279,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
} else {
global_State *g = G(L);
setmref(t->node, &g->nilnode);
+#if LJ_GC64
+ setmref(t->freetop, &g->nilnode);
+#endif
t->hmask = 0;
}
if (asize < oldasize) { /* Array part shrinks? */
@@ -445,7 +463,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
Node *n = hashkey(t, key);
if (!tvisnil(&n->val) || t->hmask == 0) {
Node *nodebase = noderef(t->node);
- Node *collide, *freenode = noderef(nodebase->freetop);
+ Node *collide, *freenode = getfreetop(t, nodebase);
lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
do {
if (freenode == nodebase) { /* No free node found? */
@@ -453,7 +471,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
return lj_tab_set(L, t, key); /* Retry key insertion. */
}
} while (!tvisnil(&(--freenode)->key));
- setmref(nodebase->freetop, freenode);
+ setfreetop(t, nodebase, freenode);
lua_assert(freenode != &G(L)->nilnode);
collide = hashkey(t, &n->key);
if (collide != n) { /* Colliding node not the main node? */
diff --git a/src/lj_tab.h b/src/lj_tab.h
index fd7f760..1da28bd 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -1,6 +1,6 @@
/*
** Table handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TAB_H
diff --git a/src/lj_target.h b/src/lj_target.h
index 820a97a..0daecb1 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -1,6 +1,6 @@
/*
** Definitions for target CPU.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_H
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
#include "lj_target_x86.h"
#elif LJ_TARGET_ARM
#include "lj_target_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_target_arm64.h"
#elif LJ_TARGET_PPC
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index 68d022b..0a243b3 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -1,6 +1,6 @@
/*
** Definitions for ARM CPUs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_ARM_H
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 0000000..99e0adc
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,97 @@
+/*
+** Definitions for ARM64 CPUs.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_ARM64_H
+#define _LJ_TARGET_ARM64_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
+ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
+ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
+ _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
+#define FPRDEF(_) \
+ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
+ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
+ _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
+ _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
+#define VRIDDEF(_)
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+ RID_TMP = RID_LR,
+ RID_ZERO = RID_SP,
+
+ /* Calling conventions. */
+ RID_RET = RID_X0,
+ RID_FPRET = RID_D0,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_X19, /* Interpreter BASE. */
+ RID_LPC = RID_X21, /* Interpreter PC. */
+ RID_GL = RID_X22, /* Interpreter GL. */
+ RID_LREG = RID_X23, /* Interpreter L. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_X0,
+ RID_MAX_GPR = RID_SP+1,
+ RID_MIN_FPR = RID_MAX_GPR,
+ RID_MAX_FPR = RID_D31+1,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
+};
+
+#define RID_NUM_KREF RID_NUM_GPR
+#define RID_MIN_KREF RID_X0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except for x18, fp, lr and sp. */
+#define RSET_FIXED \
+ (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP))
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+#define RSET_INIT RSET_ALL
+
+/* lr is an implicit scratch register. */
+#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
+#define RSET_SCRATCH_FPR \
+ (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+#define REGARG_FIRSTGPR RID_X0
+#define REGARG_LASTGPR RID_X7
+#define REGARG_NUMGPR 8
+#define REGARG_FIRSTFPR RID_D0
+#define REGARG_LASTFPR RID_D7
+#define REGARG_NUMFPR 8
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* Instruction fields. */
+#define A64F_D(r) (r)
+#define A64F_N(r) ((r) << 5)
+#define A64F_A(r) ((r) << 10)
+#define A64F_M(r) ((r) << 16)
+#define A64F_U16(x) ((x) << 5)
+#define A64F_S26(x) (x)
+#define A64F_S19(x) ((x) << 5)
+
+typedef enum A64Ins {
+ A64I_MOVZw = 0x52800000,
+ A64I_MOVZx = 0xd2800000,
+ A64I_LDRLw = 0x18000000,
+ A64I_LDRLx = 0x58000000,
+ A64I_NOP = 0xd503201f,
+ A64I_B = 0x14000000,
+ A64I_BR = 0xd61f0000,
+} A64Ins;
+
+#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 099240e..76645bc 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -1,6 +1,6 @@
/*
** Definitions for MIPS CPUs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_MIPS_H
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 475e046..9986768 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -1,6 +1,6 @@
/*
** Definitions for PPC CPUs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_PPC_H
@@ -104,7 +104,7 @@ enum {
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 171dae4..65e438f 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -1,6 +1,6 @@
/*
** Definitions for x86 and x64 CPUs.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_X86_H
diff --git a/src/lj_trace.c b/src/lj_trace.c
index f386b95..39ff046 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -1,6 +1,6 @@
/*
** Trace management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_trace_c
@@ -830,7 +830,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
ERRNO_RESTORE
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
- return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc));
+ return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2);
case BC_RETM:
return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
case BC_TSETM:
diff --git a/src/lj_trace.h b/src/lj_trace.h
index 74c5431..9eaf91b 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -1,6 +1,6 @@
/*
** Trace management.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TRACE_H
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index c58fbef..6b377cb 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -1,6 +1,6 @@
/*
** Trace compiler error messages.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/* This file may be included multiple times with different TREDEF macros. */
diff --git a/src/lj_udata.c b/src/lj_udata.c
index 6cd357e..d401a3d 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -1,6 +1,6 @@
/*
** Userdata handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_udata_c
diff --git a/src/lj_udata.h b/src/lj_udata.h
index e03d9a3..676e970 100644
--- a/src/lj_udata.h
+++ b/src/lj_udata.h
@@ -1,6 +1,6 @@
/*
** Userdata handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_UDATA_H
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 83883e2..b31e22f 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -1,6 +1,6 @@
/*
** Assembler VM interface definitions.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_VM_H
@@ -50,20 +50,18 @@ LJ_ASMF void lj_vm_exit_handler(void);
LJ_ASMF void lj_vm_exit_interp(void);
/* Internal math helper functions. */
-#if LJ_TARGET_PPC
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
#define lj_vm_floor floor
#define lj_vm_ceil ceil
#else
LJ_ASMF double lj_vm_floor(double);
-#if !LJ_TARGET_X86ORX64
LJ_ASMF double lj_vm_ceil(double);
-#endif
#if LJ_TARGET_ARM
LJ_ASMF double lj_vm_floor_sf(double);
LJ_ASMF double lj_vm_ceil_sf(double);
#endif
#endif
-#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
+#ifdef LUAJIT_NO_LOG2
LJ_ASMF double lj_vm_log2(double);
#else
#define lj_vm_log2 log2
@@ -74,12 +72,12 @@ LJ_ASMF double lj_vm_log2(double);
LJ_ASMF void lj_vm_floor_sse(void);
LJ_ASMF void lj_vm_ceil_sse(void);
LJ_ASMF void lj_vm_trunc_sse(void);
-LJ_ASMF void lj_vm_exp_x87(void);
-LJ_ASMF void lj_vm_exp2_x87(void);
-LJ_ASMF void lj_vm_pow_sse(void);
LJ_ASMF void lj_vm_powi_sse(void);
+#define lj_vm_powi NULL
#else
-#if LJ_TARGET_PPC
+LJ_ASMF double lj_vm_powi(double, int32_t);
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
#define lj_vm_trunc trunc
#else
LJ_ASMF double lj_vm_trunc(double);
@@ -87,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double);
LJ_ASMF double lj_vm_trunc_sf(double);
#endif
#endif
-LJ_ASMF double lj_vm_powi(double, int32_t);
#ifdef LUAJIT_NO_EXP2
LJ_ASMF double lj_vm_exp2(double);
#else
#define lj_vm_exp2 exp2
#endif
-#endif
LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
#if LJ_HASFFI
LJ_ASMF int lj_vm_errno(void);
@@ -109,8 +105,6 @@ LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
-enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
-
/* Start of the ASM code. */
LJ_ASMF char lj_vm_asm_begin[];
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index e14ad5b..87ebcfb 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -1,6 +1,6 @@
/*
** VM event handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#include <stdio.h>
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
if (tv && tvisfunc(tv)) {
lj_state_checkstack(L, LUA_MINSTACK);
setfuncV(L, L->top++, funcV(tv));
+ if (LJ_FR2) setnilV(L->top++);
return savestack(L, L->top);
}
}
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
index 51bae01..231e00e 100644
--- a/src/lj_vmevent.h
+++ b/src/lj_vmevent.h
@@ -1,6 +1,6 @@
/*
** VM event handling.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_VMEVENT_H
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 63886aa..ecad295 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -1,6 +1,6 @@
/*
** Math helper functions for assembler VM.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_vmmath_c
@@ -13,16 +13,29 @@
#include "lj_ir.h"
#include "lj_vm.h"
-/* -- Helper functions for generated machine code ------------------------- */
+/* -- Wrapper functions --------------------------------------------------- */
-#if LJ_TARGET_X86ORX64
-/* Wrapper functions to avoid linker issues on OSX. */
-LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); }
-LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); }
-LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); }
+#if LJ_TARGET_X86 && __ELF__ && __PIC__
+/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
+LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
+LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
+LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
+LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
+LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
+LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
+LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
+LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
+LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
+LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
+LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
+LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
+LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
+LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
#endif
-#if !LJ_TARGET_X86ORX64
+/* -- Helper functions for generated machine code ------------------------- */
+
double lj_vm_foldarith(double x, double y, int op)
{
switch (op) {
@@ -43,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op)
default: return x;
}
}
-#endif
#if LJ_HASJIT
@@ -61,7 +73,7 @@ double lj_vm_exp2(double a)
}
#endif
-#if !(LJ_TARGET_ARM || LJ_TARGET_PPC)
+#if !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
{
uint32_t y, ua, ub;
@@ -107,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
else
return 1.0 / lj_vm_powui(x, (uint32_t)-k);
}
+#endif
/* Computes fpm(x) for extended math functions. */
double lj_vm_foldfpm(double x, int fpm)
@@ -128,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
}
return 0;
}
-#endif
#if LJ_HASFFI
int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index da08f7b..be0c52d 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -1,6 +1,6 @@
/*
** LuaJIT core and libraries amalgamation.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
/*
diff --git a/src/luaconf.h b/src/luaconf.h
index 40fd68a..043590b 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -1,6 +1,6 @@
/*
** Configuration header.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef luaconf_h
diff --git a/src/luajit.c b/src/luajit.c
index e292da8..0ebc730 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -1,6 +1,6 @@
/*
** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
**
** Major portions taken verbatim or adapted from the Lua interpreter.
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
diff --git a/src/luajit.h b/src/luajit.h
index 4e1da9a..3db4bba 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -1,7 +1,7 @@
/*
** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
**
-** Copyright (C) 2005-2014 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
**
** Permission is hereby granted, free of charge, to any person obtaining
** a copy of this software and associated documentation files (the
@@ -33,7 +33,7 @@
#define LUAJIT_VERSION "LuaJIT 2.1.0-alpha"
#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_alpha
-#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2014 Mike Pall"
+#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2015 Mike Pall"
#define LUAJIT_URL "http://luajit.org/"
/* Modes for luaJIT_setmode. */
diff --git a/src/lualib.h b/src/lualib.h
index 1c1e317..96530e7 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -1,6 +1,6 @@
/*
** Standard library header.
-** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LUALIB_H
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 729b704..0360d7e 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -1,5 +1,5 @@
@rem Script to build LuaJIT with MSVC.
-@rem Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+@rem Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
@rem
@rem Either open a "Visual Studio .NET Command Prompt"
@rem (Note that the Express Edition does not contain an x64 compiler)
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 58efabc..0bd9b14 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1,6 +1,6 @@
|// Low-level VM code for ARM CPUs.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
|.arch arm
|.section code_op, code_sub
@@ -336,7 +336,7 @@ static void build_subroutines(BuildCtx *ctx)
| // - The GC shrinks the stack in between.
| // - A return back from a lua_call() with (high) nresults adjustment.
| str BASE, L->top // Save current top held in BASE (yes).
- | mov CARG2, KBASE
+ | lsr CARG2, KBASE, #3
| mov CARG1, L
| bl extern lj_state_growstack // (lua_State *L, int n)
| ldr BASE, L->top // Need the (realloced) L->top in BASE.
@@ -390,7 +390,7 @@ static void build_subroutines(BuildCtx *ctx)
| str BASE, L->base
| add PC, PC, #4 // Must point after first instruction.
| str RC, L->top
- | lsr CARG3, RA, #3
+ | lsr CARG2, RA, #3
|2:
| // L->base = new base, L->top = top
| str PC, SAVE_PC
@@ -543,9 +543,8 @@ static void build_subroutines(BuildCtx *ctx)
|1:
| beq ->cont_ffi_callback // cont = 1: return from FFI callback.
| // cont = 0: tailcall from C function.
- | ldr CARG3, [BASE, FRAME_FUNC]
- | sub CARG4, CARG4, #16
- | sub RC, CARG4, BASE
+ | sub CARG4, CARG4, #16
+ | sub RC, CARG4, BASE
| b ->vm_call_tail
|.endif
|
@@ -3295,10 +3294,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mvn RC, RC
| ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
| ldr STR:CARG3, [KBASE, RC, lsl #2]
- | mvn CARG4, #~LJ_TSTR
| ldrb RB, UPVAL:CARG2->marked
- | ldr CARG2, UPVAL:CARG2->v
| ldrb RC, UPVAL:CARG2->closed
+ | ldr CARG2, UPVAL:CARG2->v
+ | mvn CARG4, #~LJ_TSTR
| tst RB, #LJ_GC_BLACK // isblack(uv)
| ldrb RB, STR:CARG3->marked
| strd CARG34, [CARG2]
@@ -3749,7 +3748,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_TSETR:
| decode_RB8 RB, INS
| decode_RC8 RC, INS
- | // RA = dst*8, RB = table*8, RC = key*8
+ | // RA = src*8, RB = table*8, RC = key*8
| ldr TAB:CARG2, [BASE, RB]
| ldr CARG3, [BASE, RC]
| ldrb INS, TAB:CARG2->marked
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 0000000..a31cbb3
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,3763 @@
+|// Low-level VM code for ARM64 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch arm64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|// The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
+|//
+|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
+|// x18 is reserved on most platforms. Don't use it, save it or restore it.
+|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
+|// depending on the instruction.
+|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
+|//
+|// x0-x7/v0-v7 hold parameters and results.
+|
+|// Fixed register assignments for the interpreter.
+|
+|// The following must be C callee-save.
+|.define BASE, x19 // Base of current Lua stack frame.
+|.define KBASE, x20 // Constants of current Lua function.
+|.define PC, x21 // Next PC.
+|.define GLREG, x22 // Global state.
+|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
+|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
+|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
+|.define TISNIL, x26 // Constant -1LL.
+|.define fp, x29 // Yes, we have to maintain a frame pointer.
+|
+|.define ST_INTERP, w26 // Constant -1.
+|
+|// The following temporaries are not saved across C calls, except for RA/RC.
+|.define RA, x27
+|.define RC, x28
+|.define RB, x17
+|.define RAw, w27
+|.define RCw, w28
+|.define RBw, w17
+|.define INS, x16
+|.define INSw, w16
+|.define ITYPE, x15
+|.define TMP0, x8
+|.define TMP1, x9
+|.define TMP2, x10
+|.define TMP3, x11
+|.define TMP0w, w8
+|.define TMP1w, w9
+|.define TMP2w, w10
+|.define TMP3w, w11
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1, x0
+|.define CARG2, x1
+|.define CARG3, x2
+|.define CARG4, x3
+|.define CARG5, x4
+|.define CARG1w, w0
+|.define CARG2w, w1
+|.define CARG3w, w2
+|.define CARG4w, w3
+|.define CARG5w, w4
+|
+|.define FARG1, d0
+|.define FARG2, d1
+|
+|.define CRET1, x0
+|.define CRET1w, w0
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|
+|.define CFRAME_SPACE, 208
+|//----- 16 byte aligned, <-- sp entering interpreter
+|// Unused [sp, #204] // 32 bit values
+|.define SAVE_NRES, [sp, #200]
+|.define SAVE_ERRF, [sp, #196]
+|.define SAVE_MULTRES, [sp, #192]
+|.define TMPD, [sp, #184] // 64 bit values
+|.define SAVE_L, [sp, #176]
+|.define SAVE_PC, [sp, #168]
+|.define SAVE_CFRAME, [sp, #160]
+|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves
+|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves
+|.define SAVE_LR, [sp, #8]
+|.define SAVE_FP, [sp]
+|//----- 16 byte aligned, <-- sp while in interpreter.
+|
+|.define TMPDofs, #184
+|
+|.macro save_, gpr1, gpr2, fpr1, fpr2
+| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|.endmacro
+|.macro rest_, gpr1, gpr2, fpr1, fpr2
+| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|.endmacro
+|
+|.macro saveregs
+| stp fp, lr, [sp, #-CFRAME_SPACE]!
+| add fp, sp, #0
+| stp x19, x20, [sp, # SAVE_GPR_]
+| save_ 21, 22, 8, 9
+| save_ 23, 24, 10, 11
+| save_ 25, 26, 12, 13
+| save_ 27, 28, 14, 15
+|.endmacro
+|.macro restoreregs
+| ldp x19, x20, [sp, # SAVE_GPR_]
+| rest_ 21, 22, 8, 9
+| rest_ 23, 24, 10, 11
+| rest_ 25, 26, 12, 13
+| rest_ 27, 28, 14, 15
+| ldp fp, lr, [sp], # CFRAME_SPACE
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State, LREG
+|.type GL, global_State, GLREG
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS8, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; brk; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_FUNC, #-16
+|.define FRAME_PC, #-8
+|
+|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
+|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
+|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
+|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
+|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
+|
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+| ldr INSw, [PC], #4
+| add TMP1, GL, INS, uxtb #3
+| decode_RA RA, INS
+| ldr TMP0, [TMP1, #GG_G2DISP]
+| decode_RD RC, INS
+| br TMP0
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| .macro ins_next
+| b ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| ldr PC, LFUNC:CARG3->pc
+| ldr INSw, [PC], #4
+| add TMP1, GL, INS, uxtb #3
+| decode_RA RA, INS
+| ldr TMP0, [TMP1, #GG_G2DISP]
+| add RA, BASE, RA, lsl #3
+| br TMP0
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+| str PC, [BASE, FRAME_PC]
+| ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+| asr ITYPE, reg, #47
+| cmn ITYPE, #-tp
+| and reg, reg, #LJ_GCVMASK
+| bne target
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+| asr ITYPE, reg, #47
+| cmn ITYPE, #-tp
+| and dst, reg, #LJ_GCVMASK
+| bne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| bne target
+|.endmacro
+|.macro checknum, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| bls target
+|.endmacro
+|.macro checknumber, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| blo target
+|.endmacro
+|
+|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
+|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|
+#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta
+| NYI
+|.endmacro
+|
+|.macro hotloop
+| hotcheck HOTCOUNT_LOOP
+| blo ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+| hotcheck HOTCOUNT_CALL
+| blo ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
+|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp
+| ldr tmp, GL->gc.grayagain
+| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
+| str tab, GL->gc.grayagain
+| strb mark, tab->marked
+| str tmp, tab->gclist
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for ARM64 target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | // See vm_return. Also: RB = previous base.
+ | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
+ |
+ | // Return from pcall or xpcall fast func.
+ | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
+ | mov_true TMP0
+ | mov BASE, RB
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | str TMP0, [RA, #-8]! // Prepend true to results.
+ |
+ |->vm_returnc:
+ | adds RC, RC, #8 // RC = (nresults+1)*8.
+ | mov CRET1, #LUA_YIELD
+ | beq ->vm_unwind_c_eh
+ | str RCw, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | beq ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
+ | // CARG1 = PC & FRAME_TYPE
+ | and RB, PC, #~FRAME_TYPEP
+ | cmp CARG1, #FRAME_C
+ | sub RB, BASE, RB // RB = previous base.
+ | bne ->vm_returnp
+ |
+ | str RB, L->base
+ | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
+ | mv_vmstate TMP0w, C
+ | sub BASE, BASE, #16
+ | subs TMP2, RC, #8
+ | st_vmstate TMP0w
+ | beq >2
+ |1:
+ | subs TMP2, TMP2, #8
+ | ldr TMP0, [RA], #8
+ | str TMP0, [BASE], #8
+ | bne <1
+ |2:
+ | cmp RC, CARG2, lsl #3 // More/less results wanted?
+ | bne >6
+ |3:
+ | str BASE, L->top // Store new top.
+ |
+ |->vm_leave_cp:
+ | ldr RC, SAVE_CFRAME // Restore previous C frame.
+ | mov CRET1, #0 // Ok return status for vm_pcall.
+ | str RC, L->cframe
+ |
+ |->vm_leave_unw:
+ | restoreregs
+ | ret
+ |
+ |6:
+ | bgt >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | ldr CARG3, L->maxstack
+ | cmp BASE, CARG3
+ | bhs >8
+ | str TISNIL, [BASE], #8
+ | add RC, RC, #8
+ | b <2
+ |
+ |7: // Less results wanted.
+ | cbz CARG2, <3 // LUA_MULTRET+1 case?
+ | sub CARG1, RC, CARG2, lsl #3
+ | sub BASE, BASE, CARG1 // Shrink top.
+ | b <3
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | str BASE, L->top // Save current top held in BASE (yes).
+ | mov CARG1, L
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->top // Need the (realloced) L->top in BASE.
+ | ldrsw CARG2, SAVE_NRES
+ | b <2
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | mov sp, CARG1
+ | mov CRET1, CARG2
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | mv_vmstate TMP0w, C
+ | ldr GL, L->glref
+ | st_vmstate TMP0w
+ | b ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | and sp, CARG1, #CFRAME_RAWMASK
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | mov RC, #16 // 2 results: false + error message.
+ | ldr BASE, L->base
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov_false TMP0
+ | sub RA, BASE, #8 // Results start at BASE-8.
+ | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
+ | str TMP0, [BASE, #-8] // Prepend false to error message.
+ | st_vmstate ST_INTERP
+ | b ->vm_returnc
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | // CARG1 = L
+ | mov CARG2, #LUA_MINSTACK
+ | b >2
+ |
+ |->vm_growstack_l: // Grow stack for Lua function.
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+ | add RC, BASE, RC
+ | sub RA, RA, BASE
+ | mov CARG1, L
+ | stp BASE, RC, L->base
+ | add PC, PC, #4 // Must point after first instruction.
+ | lsr CARG2, RA, #3
+ |2:
+ | // L->base = new base, L->top = top
+ | str PC, SAVE_PC
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldp BASE, RC, L->base
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, RC, BASE
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | mov L, CARG1
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov BASE, CARG2
+ | str L, SAVE_L
+ | mov PC, #FRAME_CP
+ | str wzr, SAVE_NRES
+ | add TMP0, sp, #CFRAME_RESUME
+ | ldrb TMP1w, L->status
+ | str wzr, SAVE_ERRF
+ | str L, SAVE_PC // Any value outside of bytecode is ok.
+ | str xzr, SAVE_CFRAME
+ | str TMP0, L->cframe
+ | cbz TMP1w, >3
+ |
+ | // Resume after yield (like a return).
+ | str L, GL->cur_L
+ | mov RA, BASE
+ | ldp BASE, CARG1, L->base
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | ldr PC, [BASE, FRAME_PC]
+ | strb wzr, L->status
+ | movn TISNIL, #0
+ | sub RC, CARG1, BASE
+ | ands CARG1, PC, #FRAME_TYPE
+ | add RC, RC, #8
+ | st_vmstate ST_INTERP
+ | str RCw, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | mov PC, #FRAME_CP
+ | str CARG4w, SAVE_ERRF
+ | b >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | mov PC, #FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | ldr RC, L:CARG1->cframe
+ | str CARG3w, SAVE_NRES
+ | mov L, CARG1
+ | str CARG1, SAVE_L
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov BASE, CARG2
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | str RC, SAVE_CFRAME
+ | str fp, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | str L, GL->cur_L
+ | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | add PC, PC, BASE
+ | movn TISNIL, #0
+ | sub PC, PC, RB // PC = frame delta + frame type
+ | sub NARGS8:RC, CARG1, BASE
+ | st_vmstate ST_INTERP
+ |
+ |->vm_call_dispatch:
+ | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | checkfunc CARG3, ->vmeta_call
+ |
+ |->vm_call_dispatch_f:
+ | ins_call
+ | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | mov L, CARG1
+ | ldr RA, L:CARG1->stack
+ | str CARG1, SAVE_L
+ | ldr GL, L->glref // Setup pointer to global state.
+ | ldr RB, L->top
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | ldr RC, L->cframe
+ | sub RA, RA, RB // Compute -savestack(L, L->top).
+ | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | str wzr, SAVE_ERRF // No error function.
+ | str RC, SAVE_CFRAME
+ | str fp, L->cframe // Add our C frame to cframe chain.
+ | str L, GL->cur_L
+ | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | mov BASE, CRET1
+ | mov PC, #FRAME_CP
+ | cbnz BASE, <3 // Else continue with the call.
+ | b ->vm_leave_cp // No base? Just remove C frame.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
+ | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
+ | ldr CARG1, [BASE, #-32] // Get continuation.
+ | mov CARG4, BASE
+ | mov BASE, RB // Restore caller BASE.
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ |.if FFI
+ | cmp CARG1, #1
+ |.endif
+ | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
+ | ldr CARG3, LFUNC:CARG3->pc
+ | add TMP0, RA, RC
+ | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
+ |.if FFI
+ | bls >1
+ |.endif
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | // BASE = base, RA = resultptr, CARG4 = meta base
+ | br CARG1
+ |
+ |.if FFI
+ |1:
+ | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: tailcall from C function.
+ | sub CARG4, CARG4, #32
+ | sub RC, CARG4, BASE
+ | b ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // RA = resultptr, CARG4 = meta base
+ | ldr INSw, [PC, #-4]
+ | sub CARG2, CARG4, #32
+ | ldr TMP0, [RA]
+ | str BASE, L->base
+ | decode_RB RB, INS
+ | decode_RA RA, INS
+ | add TMP1, BASE, RB, lsl #3
+ | subs TMP1, CARG2, TMP1
+ | beq >1
+ | str TMP0, [CARG2]
+ | lsr CARG3, TMP1, #3
+ | b ->BC_CAT_Z
+ |
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | b ->cont_nop
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets1:
+ | movn CARG4, #~LJ_TSTR
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG4, STR:RC, CARG4, lsl #47
+ | b >2
+ |
+ |->vmeta_tgets:
+ | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CARG2, GL->tmptv
+ | add CARG2, GL, #offsetof(global_State, tmptv)
+ |2:
+ | add CARG3, sp, TMPDofs
+ | str CARG4, TMPD
+ | b >1
+ |
+ |->vmeta_tgetb: // RB = table, RC = index
+ | add RC, RC, TISNUM
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, sp, TMPDofs
+ | str RC, TMPD
+ | b >1
+ |
+ |->vmeta_tgetv: // RB = table, RC = key
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, BASE, RC, lsl #3
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | cbz CRET1, >3
+ | ldr TMP0, [CRET1]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | sub TMP1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #16 // 2 args for func(t, k).
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | str PC, [BASE, #-24] // [cont|PC]
+ | sub PC, BASE, TMP1
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | sxtw CARG2, TMP1w
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | mov TMP0, TISNIL
+ | cbz CRET1, ->BC_TGETR_Z
+ | ldr TMP0, [CRET1]
+ | b ->BC_TGETR_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets1:
+ | movn CARG4, #~LJ_TSTR
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG4, STR:RC, CARG4, lsl #47
+ | b >2
+ |
+ |->vmeta_tsets:
+ | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CARG2, GL->tmptv
+ | add CARG2, GL, #offsetof(global_State, tmptv)
+ |2:
+ | add CARG3, sp, TMPDofs
+ | str CARG4, TMPD
+ | b >1
+ |
+ |->vmeta_tsetb: // RB = table, RC = index
+ | add RC, RC, TISNUM
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, sp, TMPDofs
+ | str RC, TMPD
+ | b >1
+ |
+ |->vmeta_tsetv:
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, BASE, RC, lsl #3
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cbz CRET1, >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | str TMP0, [CRET1]
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | sub TMP1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | str TMP0, [BASE, #16] // Copy value to third argument.
+ | str PC, [BASE, #-24] // [cont|PC]
+ | sub PC, BASE, TMP1
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | sxtw CARG3, TMP1w
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ | b ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | add CARG2, BASE, RA, lsl #3
+ | sub PC, PC, #4
+ | add CARG3, BASE, RC, lsl #3
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | uxtb CARG4w, INSw
+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // Returns 0/1 or TValue * (metamethod).
+ |3:
+ | cmp CRET1, #1
+ | bhi ->vmeta_binop
+ |4:
+ | ldrh RBw, [PC, #2]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | csel PC, PC, RB, lo
+ |->cont_nop:
+ | ins_next
+ |
+ |->cont_ra: // RA = resultptr
+ | ldr INSw, [PC, #-4]
+ | ldr TMP0, [RA]
+ | decode_RA TMP1, INS
+ | str TMP0, [BASE, TMP1, lsl #3]
+ | b ->cont_nop
+ |
+ |->cont_condt: // RA = resultptr
+ | ldr TMP0, [RA]
+ | mov_true TMP1
+ | cmp TMP1, TMP0 // Branch if result is true.
+ | b <4
+ |
+ |->cont_condf: // RA = resultptr
+ | ldr TMP0, [RA]
+ | mov_false TMP1
+ | cmp TMP0, TMP1 // Branch if result is false.
+ | b <4
+ |
+ |->vmeta_equal:
+ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+ | and TAB:CARG3, CARG3, #LJ_GCVMASK
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | mov CARG2, INS
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | mov CARG2, RA
+ | mov CARG3, RC
+ | str PC, SAVE_PC
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | b ->cont_nop
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vn:
+ | add CARG3, BASE, RB, lsl #3
+ | add CARG4, KBASE, RC, lsl #3
+ | b >1
+ |
+ |->vmeta_arith_nv:
+ | add CARG4, BASE, RB, lsl #3
+ | add CARG3, KBASE, RC, lsl #3
+ | b >1
+ |
+ |->vmeta_unm:
+ | add CARG3, BASE, RC, lsl #3
+ | mov CARG4, CARG3
+ | b >1
+ |
+ |->vmeta_arith_vv:
+ | add CARG3, BASE, RB, lsl #3
+ | add CARG4, BASE, RC, lsl #3
+ |1:
+ | uxtb CARG5w, INSw
+ | add CARG2, BASE, RA, lsl #3
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | cbz CRET1, ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+ | sub TMP1, CRET1, BASE
+ | str PC, [CRET1, #-24] // [cont|PC]
+ | add PC, TMP1, #FRAME_CONT
+ | mov BASE, CRET1
+ | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
+ | b ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | add CARG2, BASE, RC, lsl #3
+#if LJ_52
+ | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
+#endif
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_len // (lua_State *L, TValue *o)
+ | // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+ | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
+ | mov TAB:CARG1, TAB:RC
+ | b ->BC_LEN_Z
+#else
+ | b ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // RB = old base, BASE = new base, RC = nargs*8
+ | mov CARG1, L
+ | str RB, L->base // This is the callers base!
+ | sub CARG2, BASE, #16
+ | str PC, SAVE_PC
+ | add CARG3, BASE, NARGS8:RC
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_call
+ |
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
+ | // BASE = old base, RA = new base, RC = nargs*8
+ | mov CARG1, L
+ | str BASE, L->base
+ | sub CARG2, RA, #16
+ | str PC, SAVE_PC
+ | add CARG3, RA, NARGS8:RC
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here.
+ | ldr PC, [BASE, FRAME_PC]
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | b ->BC_CALLT2_Z
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, RA
+ | str PC, SAVE_PC
+ | bl extern lj_meta_for // (lua_State *L, TValue *base)
+ | ldr INSw, [PC, #-4]
+ |.if JIT
+ | uxtb TMP0, INS
+ |.endif
+ | decode_RA RA, INS
+ | decode_RD RC, INS
+ |.if JIT
+ | cmp TMP0, #BC_JFORI
+ | beq =>BC_JFORI
+ |.endif
+ | b =>BC_FORI
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | ldp CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc name
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr FARG1, [BASE]
+ | blo ->fff_fallback
+ | checknum CARG1, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc name
+ | ldp CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | ldp FARG1, FARG2, [BASE]
+ | blo ->fff_fallback
+ | checknum CARG1, ->fff_fallback
+ | checknum CARG2, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
+ |.macro ffgccheck
+ | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
+ | cmp CARG1, CARG2
+ | blt >1
+ | bl ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | ldr PC, [BASE, FRAME_PC]
+ | mov_false TMP1
+ | cmp CARG1, TMP1
+ | bhs ->fff_fallback
+ | str CARG1, [BASE, #-16]
+ | sub RB, BASE, #8
+ | subs RA, NARGS8:RC, #8
+ | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
+ | cbz RA, ->fff_res // Done if exactly 1 argument.
+ |1:
+ | ldr CARG1, [RB, #16]
+ | sub RA, RA, #8
+ | str CARG1, [RB], #8
+ | cbnz RA, <1
+ | b ->fff_res
+ |
+ |.ffunc_1 type
+ | mov TMP0, #~LJ_TISNUM
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #~LJ_TISNUM
+ | csinv TMP1, TMP0, ITYPE, lo
+ | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
+ | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
+ | b ->fff_restv
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TTAB
+ | ccmn ITYPE, #-LJ_TUDATA, #4, ne
+ | and TAB:CARG1, CARG1, #LJ_GCVMASK
+ | bne >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | ldr TAB:RB, TAB:CARG1->metatable
+ |2:
+ | mov CARG1, TISNIL
+ | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
+ | cbz TAB:RB, ->fff_restv
+ | ldr TMP1w, TAB:RB->hmask
+ | ldr TMP2w, STR:RC->hash
+ | ldr NODE:CARG3, TAB:RB->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | ldp CARG1, TMP0, NODE:CARG3->val
+ | ldr NODE:CARG3, NODE:CARG3->next
+ | cmp TMP0, CARG4
+ | beq >5
+ | cbnz NODE:CARG3, <3
+ |4:
+ | mov CARG1, RB // Use metatable as default result.
+ | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | b ->fff_restv
+ |5:
+ | cmp TMP0, TISNIL
+ | bne ->fff_restv
+ | b <4
+ |
+ |6:
+ | movn TMP0, #~LJ_TISNUM
+ | cmp ITYPE, TMP0
+ | csel ITYPE, ITYPE, TMP0, hs
+ | sub TMP1, GL, ITYPE, lsl #3
+ | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
+ | b <2
+ |
+ |.ffunc_2 setmetatable
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+ | ldr TAB:TMP0, TAB:TMP1->metatable
+ | asr ITYPE, CARG2, #47
+ | ldrb TMP2w, TAB:TMP1->marked
+ | cmn ITYPE, #-LJ_TTAB
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ccmp TAB:TMP0, #0, #0, eq
+ | bne ->fff_fallback
+ | str TAB:CARG2, TAB:TMP1->metatable
+ | tbz TMP2w, #2, ->fff_restv // isblack(table)
+ | barrierback TAB:TMP1, TMP2w, TMP0
+ | b ->fff_restv
+ |
+ |.ffunc rawget
+ | ldr CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | checktab CARG2, ->fff_fallback
+ | mov CARG1, L
+ | add CARG3, BASE, #8
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // Returns cTValue *.
+ | ldr CARG1, [CRET1]
+ | b ->fff_restv
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | bne ->fff_fallback
+ | checknumber CARG1, ->fff_fallback
+ | b ->fff_restv
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TSTR
+ | // A __tostring method in the string base metatable is ignored.
+ | beq ->fff_restv
+ | // Handle numbers inline, unless a number base metatable is present.
+ | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
+ | str BASE, L->base
+ | cmn ITYPE, #-LJ_TISNUM
+ | ccmp TMP1, #0, #0, ls
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | bne ->fff_fallback
+ | ffgccheck
+ | mov CARG1, L
+ | mov CARG2, BASE
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // Returns GCstr *.
+ | movn TMP1, #~LJ_TSTR
+ | ldr BASE, L->base
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
+ | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
+ | ldr PC, [BASE, FRAME_PC]
+ | stp BASE, BASE, L->base // Add frame since C call can throw.
+ | mov CARG1, L
+ | add CARG3, BASE, #8
+ | str PC, SAVE_PC
+ | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+ | // Returns 0 at end of traversal.
+ | str TISNIL, [BASE, #-16]
+ | cbz CRET1, ->fff_res1 // End of traversal: return nil.
+ | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results.
+ | mov RC, #(2+1)*8
+ | stp CARG1, CARG2, [BASE, #-16]
+ | b ->fff_res
+ |
+ |.ffunc_1 pairs
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+ | ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+ | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cbnz TAB:CARG2, ->fff_fallback
+#endif
+ | mov RC, #(3+1)*8
+ | stp CARG1, TISNIL, [BASE, #-8]
+ | str CFUNC:CARG4, [BASE, #-16]
+ | b ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | checktab CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | ldr TMP1w, TAB:CARG1->asize
+ | ldr CARG3, TAB:CARG1->array
+ | ldr TMP0w, TAB:CARG1->hmask
+ | add CARG2w, CARG2w, #1
+ | cmp CARG2w, TMP1w
+ | ldr PC, [BASE, FRAME_PC]
+ | add TMP2, CARG2, TISNUM
+ | mov RC, #(0+1)*8
+ | str TMP2, [BASE, #-16]
+ | bhs >2 // Not in array part?
+ | ldr TMP0, [CARG3, CARG2, lsl #3]
+ |1:
+ | mov TMP1, #(2+1)*8
+ | cmp TMP0, TISNIL
+ | str TMP0, [BASE, #-8]
+ | csel RC, RC, TMP1, eq
+ | b ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cbz TMP0w, ->fff_res
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | cbz CRET1, ->fff_res
+ | ldr TMP0, [CRET1]
+ | b <1
+ |
+ |.ffunc_1 ipairs
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+ | ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+ | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cbnz TAB:CARG2, ->fff_fallback
+#endif
+ | mov RC, #(3+1)*8
+ | stp CARG1, TISNUM, [BASE, #-8]
+ | str CFUNC:CARG4, [BASE, #-16]
+ | b ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc pcall
+ | ldrb TMP0w, GL->hookmask
+ | subs NARGS8:RC, NARGS8:RC, #8
+ | blo ->fff_fallback
+ | mov RB, BASE
+ | add BASE, BASE, #16
+ | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+ | add PC, TMP0, #16+FRAME_PCALL
+ | beq ->vm_call_dispatch
+ |1:
+ | add TMP2, BASE, NARGS8:RC
+ |2:
+ | ldr TMP0, [TMP2, #-16]
+ | str TMP0, [TMP2, #-8]!
+ | cmp TMP2, BASE
+ | bne <2
+ | b ->vm_call_dispatch
+ |
+ |.ffunc xpcall
+ | ldp CARG1, CARG2, [BASE]
+ | ldrb TMP0w, GL->hookmask
+ | subs NARGS8:RC, NARGS8:RC, #16
+ | blo ->fff_fallback
+ | mov RB, BASE
+ | add BASE, BASE, #24
+ | asr ITYPE, CARG2, #47
+ | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+ | cmn ITYPE, #-LJ_TFUNC
+ | add PC, TMP0, #24+FRAME_PCALL
+ | bne ->fff_fallback // Traceback must be a function.
+ | stp CARG2, CARG1, [RB] // Swap function and traceback.
+ | cbz NARGS8:RC, ->vm_call_dispatch
+ | b <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | checktp CARG1, LJ_TTHREAD, ->fff_fallback
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
+ | and L:CARG1, CARG1, #LJ_GCVMASK
+ |.endif
+ | ldr PC, [BASE, FRAME_PC]
+ | str BASE, L->base
+ | ldp RB, CARG2, L:CARG1->base
+ | ldrb TMP1w, L:CARG1->status
+ | add TMP0, CARG2, TMP1
+ | str PC, SAVE_PC
+ | cmp TMP0, RB
+ | beq ->fff_fallback
+ | cmp TMP1, #LUA_YIELD
+ | add TMP0, CARG2, #8
+ | csel CARG2, CARG2, TMP0, hs
+ | ldr CARG4, L:CARG1->maxstack
+ | add CARG3, CARG2, NARGS8:RC
+ | ldr RB, L:CARG1->cframe
+ | ccmp CARG3, CARG4, #2, ls
+ | ccmp RB, #0, #2, ls
+ | bhi ->fff_fallback
+ |.if resume
+ | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
+ | add BASE, BASE, #8
+ | sub NARGS8:RC, NARGS8:RC, #8
+ |.endif
+ | str CARG3, L:CARG1->top
+ | str BASE, L->top
+ | cbz NARGS8:RC, >3
+ |2: // Move args to coroutine.
+ | ldr TMP0, [BASE, RB]
+ | cmp RB, NARGS8:RC
+ | str TMP0, [CARG2, RB]
+ | add RB, RB, #8
+ | bne <2
+ |3:
+ | mov CARG3, #0
+ | mov L:RA, L:CARG1
+ | mov CARG4, #0
+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ | // Returns thread status.
+ |4:
+ | ldp CARG3, CARG4, L:RA->base
+ | cmp CRET1, #LUA_YIELD
+ | ldr BASE, L->base
+ | str L, GL->cur_L
+ | st_vmstate ST_INTERP
+ | bhi >8
+ | sub RC, CARG4, CARG3
+ | ldr CARG1, L->maxstack
+ | add CARG2, BASE, RC
+ | cbz RC, >6 // No results?
+ | cmp CARG2, CARG1
+ | mov RB, #0
+ | bhi >9 // Need to grow stack?
+ |
+ | sub CARG4, RC, #8
+ | str CARG3, L:RA->top // Clear coroutine stack.
+ |5: // Move results from coroutine.
+ | ldr TMP0, [CARG3, RB]
+ | cmp RB, CARG4
+ | str TMP0, [BASE, RB]
+ | add RB, RB, #8
+ | bne <5
+ |6:
+ |.if resume
+ | mov_true TMP1
+ | add RC, RC, #16
+ |7:
+ | str TMP1, [BASE, #-8] // Prepend true/false to results.
+ | sub RA, BASE, #8
+ |.else
+ | mov RA, BASE
+ | add RC, RC, #8
+ |.endif
+ | ands CARG1, PC, #FRAME_TYPE
+ | str PC, SAVE_PC
+ | str RCw, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | ldr TMP0, [CARG4, #-8]!
+ | mov_false TMP1
+ | mov RC, #(2+1)*8
+ | str CARG4, L:RA->top // Remove error from coroutine stack.
+ | str TMP0, [BASE] // Copy error message.
+ | b <7
+ |.else
+ | mov CARG1, L
+ | mov CARG2, L:RA
+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Never returns.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | mov CARG1, L
+ | lsr CARG2, RC, #3
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | mov CRET1, #0
+ | b <4
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | ldr TMP0, L->cframe
+ | add TMP1, BASE, NARGS8:RC
+ | mov CRET1, #LUA_YIELD
+ | stp BASE, TMP1, L->base
+ | tbz TMP0, #0, ->fff_fallback
+ | str xzr, L->cframe
+ | strb CRET1w, L->status
+ | b ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.macro math_round, func, round
+ | .ffunc math_ .. func
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr d0, [BASE]
+ | blo ->fff_fallback
+ | cmp TISNUMhi, CARG1, lsr #32
+ | beq ->fff_restv
+ | round d0, d0
+ | b ->fff_resn
+ |.endmacro
+ |
+ | math_round floor, frintm
+ | math_round ceil, frintp
+ |
+ |.ffunc_1 math_abs
+ | checknumber CARG1, ->fff_fallback
+ | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
+ | bne ->fff_restv
+ | eor CARG2w, CARG1w, CARG1w, asr #31
+ | movz CARG3, #0x41e0, lsl #48 // 2^31.
+ | subs CARG1w, CARG2w, CARG1w, asr #31
+ | add CARG1, CARG1, TISNUM
+ | csel CARG1, CARG1, CARG3, pl
+ | // Fallthrough.
+ |
+ |->fff_restv:
+ | // CARG1 = TValue result.
+ | ldr PC, [BASE, FRAME_PC]
+ | str CARG1, [BASE, #-16]
+ |->fff_res1:
+ | // PC = return.
+ | mov RC, #(1+1)*8
+ |->fff_res:
+ | // RC = (nresults+1)*8, PC = return.
+ | ands CARG1, PC, #FRAME_TYPE
+ | str RCw, SAVE_MULTRES
+ | sub RA, BASE, #16
+ | bne ->vm_return
+ | ldr INSw, [PC, #-4]
+ | decode_RB RB, INS
+ |5:
+ | cmp RC, RB, lsl #3 // More results expected?
+ | blo >6
+ | decode_RA TMP1, INS
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | sub BASE, RA, TMP1, lsl #3
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add TMP1, RA, RC
+ | add RC, RC, #8
+ | str TISNIL, [TMP1, #-8]
+ | b <5
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.ffunc_n math_sqrt
+ | fsqrt d0, d0
+ |->fff_resn:
+ | ldr PC, [BASE, FRAME_PC]
+ | str d0, [BASE, #-16]
+ | b ->fff_res1
+ |
+ |.ffunc math_log
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr FARG1, [BASE]
+ | bne ->fff_fallback // Need exactly 1 argument.
+ | checknum CARG1, ->fff_fallback
+ | bl extern log
+ | b ->fff_resn
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | ldr FARG1, [BASE]
+ | checknum CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | sxtw CARG1, CARG2w
+ | bl extern ldexp // (double x, int exp)
+ | b ->fff_resn
+ |
+ |.ffunc_n math_frexp
+ | add CARG1, sp, TMPDofs
+ | bl extern frexp
+ | ldr CARG2w, TMPD
+ | ldr PC, [BASE, FRAME_PC]
+ | str d0, [BASE, #-16]
+ | mov RC, #(2+1)*8
+ | add CARG2, CARG2, TISNUM
+ | str CARG2, [BASE, #-8]
+ | b ->fff_res
+ |
+ |.ffunc_n math_modf
+ | sub CARG1, BASE, #16
+ | ldr PC, [BASE, FRAME_PC]
+ | bl extern modf
+ | mov RC, #(2+1)*8
+ | str d0, [BASE, #-8]
+ | b ->fff_res
+ |
+ |.macro math_minmax, name, cond, fcond
+ | .ffunc_1 name
+ | add RB, BASE, RC
+ | add RA, BASE, #8
+ | checkint CARG1, >4
+ |1: // Handle integers.
+ | ldr CARG2, [RA]
+ | cmp RA, RB
+ | bhs ->fff_restv
+ | checkint CARG2, >3
+ | cmp CARG1w, CARG2w
+ | add RA, RA, #8
+ | csel CARG1, CARG2, CARG1, cond
+ | b <1
+ |3: // Convert intermediate result to number and continue below.
+ | scvtf d0, CARG1w
+ | blo ->fff_fallback
+ | ldr d1, [RA]
+ | b >6
+ |
+ |4:
+ | ldr d0, [BASE]
+ | blo ->fff_fallback
+ |5: // Handle numbers.
+ | ldr CARG2, [RA]
+ | ldr d1, [RA]
+ | cmp RA, RB
+ | bhs ->fff_resn
+ | checknum CARG2, >7
+ |6:
+ | fcmp d0, d1
+ | add RA, RA, #8
+ | fcsel d0, d1, d0, fcond
+ | b <5
+ |7: // Convert integer to number and continue above.
+ | scvtf d1, CARG2w
+ | blo ->fff_fallback
+ | b <6
+ |.endmacro
+ |
+ | math_minmax math_min, gt, hi
+ | math_minmax math_max, lt, lo
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | ldp PC, CARG1, [BASE, FRAME_PC]
+ | cmp NARGS8:RC, #8
+ | asr ITYPE, CARG1, #47
+ | ccmn ITYPE, #-LJ_TSTR, #0, eq
+ | and STR:CARG1, CARG1, #LJ_GCVMASK
+ | bne ->fff_fallback
+ | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
+ | ldr CARG3w, STR:CARG1->len
+ | add TMP0, TMP0, TISNUM
+ | str TMP0, [BASE, #-16]
+ | mov RC, #(0+1)*8
+ | cbz CARG3, ->fff_res
+ | b ->fff_res1
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | ldp PC, CARG1, [BASE, FRAME_PC]
+ | cmp CARG1w, #255
+ | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
+ | bne ->fff_fallback
+ | checkint CARG1, ->fff_fallback
+ | mov CARG3, #1
+ | mov CARG2, BASE // Points to stack. Little-endian.
+ |->fff_newstr:
+ | // CARG2 = str, CARG3 = len.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // Returns GCstr *.
+ | ldr BASE, L->base
+ | movn TMP1, #~LJ_TSTR
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | ldr CARG1, [BASE]
+ | ldr CARG3, [BASE, #16]
+ | cmp NARGS8:RC, #16
+ | movn RB, #0
+ | beq >1
+ | blo ->fff_fallback
+ | checkint CARG3, ->fff_fallback
+ | sxtw RB, CARG3w
+ |1:
+ | ldr CARG2, [BASE, #8]
+ | checkstr CARG1, ->fff_fallback
+ | ldr TMP1w, STR:CARG1->len
+ | checkint CARG2, ->fff_fallback
+ | sxtw CARG2, CARG2w
+ | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
+ | add TMP2, RB, TMP1
+ | cmp RB, #0
+ | add TMP0, CARG2, TMP1
+ | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
+ | cmp CARG2, #0
+ | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
+ | cmp RB, #0
+ | csel RB, RB, xzr, ge // if (end < 0) end = 0
+ | cmp CARG2, #1
+ | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
+ | cmp RB, TMP1
+ | csel RB, RB, TMP1, le // if (end > len) end = len
+ | add CARG1, STR:CARG1, #sizeof(GCstr)-1
+ | subs CARG3, RB, CARG2 // len = end - start
+ | add CARG2, CARG1, CARG2
+ | add CARG3, CARG3, #1 // len += 1
+ | bge ->fff_newstr
+ | add STR:CARG1, GL, #offsetof(global_State, strempty)
+ | movn TMP1, #~LJ_TSTR
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
+ | ffgccheck
+ | ldr CARG2, [BASE]
+ | cmp NARGS8:RC, #8
+ | asr ITYPE, CARG2, #47
+ | ccmn ITYPE, #-LJ_TSTR, #0, hs
+ | and STR:CARG2, CARG2, #LJ_GCVMASK
+ | bne ->fff_fallback
+ | ldr TMP0, GL->tmpbuf.b
+ | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, GL->tmpbuf.L
+ | str TMP0, GL->tmpbuf.p
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
+ |->vm_tobit_fb:
+ | bls ->fff_fallback
+ | add CARG2, CARG1, CARG1
+ | mov CARG3, #1076
+ | sub CARG3, CARG3, CARG2, lsr #53
+ | cmp CARG3, #53
+ | bhi >1
+ | and CARG2, CARG2, #U64x(001fffff,ffffffff)
+ | orr CARG2, CARG2, #U64x(00200000,00000000)
+ | cmp CARG1, #0
+ | lsr CARG2, CARG2, CARG3
+ | cneg CARG1w, CARG2w, mi
+ | br lr
+ |1:
+ | mov CARG1w, #0
+ | br lr
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_1 bit_..name
+ | adr lr, >1
+ | checkint CARG1, ->vm_tobit_fb
+ |1:
+ |.endmacro
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name
+ | mov RA, #8
+ | mov TMP0w, CARG1w
+ | adr lr, >2
+ |1:
+ | ldr CARG1, [BASE, RA]
+ | cmp RA, NARGS8:RC
+ | add RA, RA, #8
+ | bge >9
+ | checkint CARG1, ->vm_tobit_fb
+ |2:
+ | ins TMP0w, TMP0w, CARG1w
+ | b <1
+ |.endmacro
+ |
+ |.ffunc_bit_op band, and
+ |.ffunc_bit_op bor, orr
+ |.ffunc_bit_op bxor, eor
+ |
+ |.ffunc_bit tobit
+ | mov TMP0w, CARG1w
+ |9: // Label reused by .ffunc_bit_op users.
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.ffunc_bit bswap
+ | rev TMP0w, CARG1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.ffunc_bit bnot
+ | mvn TMP0w, CARG1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.macro .ffunc_bit_sh, name, ins, shmod
+ | .ffunc bit_..name
+ | ldp TMP0, CARG1, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | adr lr, >1
+ | checkint CARG1, ->vm_tobit_fb
+ |1:
+ |.if shmod == 0
+ | mov TMP1, CARG1
+ |.else
+ | neg TMP1, CARG1
+ |.endif
+ | mov CARG1, TMP0
+ | adr lr, >2
+ | checkint CARG1, ->vm_tobit_fb
+ |2:
+ | ins TMP0w, CARG1w, TMP1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |.endmacro
+ |
+ |.ffunc_bit_sh lshift, lsl, 0
+ |.ffunc_bit_sh rshift, lsr, 0
+ |.ffunc_bit_sh arshift, asr, 0
+ |.ffunc_bit_sh rol, ror, 1
+ |.ffunc_bit_sh ror, ror, 0
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RC = nargs*8
+ | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
+ | ldr TMP2, L->maxstack
+ | add TMP1, BASE, NARGS8:RC
+ | stp BASE, TMP1, L->base
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | add TMP1, TMP1, #8*LUA_MINSTACK
+ | ldr CARG3, CFUNC:CARG3->f
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | cmp TMP1, TMP2
+ | mov CARG1, L
+ | bhi >5 // Need to grow stack.
+ | blr CARG3 // (lua_State *L)
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | ldr BASE, L->base
+ | cmp CRET1w, #0
+ | lsl RC, CRET1, #3
+ | sub RA, BASE, #16
+ | bgt ->fff_res // Returned nresults+1?
+ |1: // Returned 0 or -1: retry fast path.
+ | ldr CARG1, L->top
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, CARG1, BASE
+ | bne ->vm_call_tail // Returned -1?
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | ands TMP0, PC, #FRAME_TYPE
+ | and TMP1, PC, #~FRAME_TYPEP
+ | bne >3
+ | ldrb RAw, [PC, #-3]
+ | lsl RA, RA, #3
+ | add TMP1, RA, #16
+ |3:
+ | sub RB, BASE, TMP1
+ | b ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | mov CARG2, #LUA_MINSTACK
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->base
+ | cmp CARG1, CARG1 // Set zero-flag to force retry.
+ | b <1
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RC = nargs*8
+ | add CARG2, BASE, NARGS8:RC // Calculate L->top.
+ | mov RA, lr
+ | stp BASE, CARG2, L->base
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | mov CARG1, L
+ | bl extern lj_gc_step // (lua_State *L)
+ | ldp BASE, CARG2, L->base
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+ | mov lr, RA // Help return address predictor.
+ | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ | NYI
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | ldrb TMP2w, GL->hookmask
+ | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
+ |5: // Re-dispatch to static ins.
+ | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+ | br TMP0
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | ldrb TMP2w, GL->hookmask
+ | ldr TMP3w, GL->hookcount
+ | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
+ | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
+ | beq <5
+ | sub TMP3w, TMP3w, #1
+ | str TMP3w, GL->hookcount
+ | cbz TMP3w, >1
+ | tbz TMP2w, #LUA_HOOKLINE, <5
+ |1:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | ldr BASE, L->base
+ |4: // Re-dispatch to static ins.
+ | ldr INSw, [PC, #-4]
+ | add TMP1, GL, INS, uxtb #3
+ | decode_RA RA, INS
+ | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+ | decode_RD RC, INS
+ | br TMP0
+ |
+ |->cont_hook: // Continue from hook yield.
+ | ldr CARG1, [CARG4, #-40]
+ | add PC, PC, #4
+ | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
+ | b <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ | NYI
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | mov CARG2, PC
+ |.if JIT
+ | b >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | orr CARG2, PC, #1
+ |1:
+ |.endif
+ | add TMP1, BASE, NARGS8:RC
+ | str PC, SAVE_PC
+ | mov CARG1, L
+ | sub RA, RA, BASE
+ | stp BASE, TMP1, L->base
+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // Returns ASMFunction.
+ | ldp BASE, TMP1, L->base
+ | str xzr, SAVE_PC // Invalidate for subsequent line hook.
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | add RA, BASE, RA
+ | sub NARGS8:RC, TMP1, BASE
+ | ldr INSw, [PC, #-4]
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | br CRET1
+ |
+ |->cont_stitch: // Trace stitching.
+ | NYI
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | ldr BASE, L->base
+ | sub PC, PC, #4
+ | b ->cont_nop
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_exit_handler:
+ | NYI
+ |->vm_exit_interp:
+ | NYI
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ | // int lj_vm_modi(int dividend, int divisor);
+ |->vm_modi:
+ | eor CARG4w, CARG1w, CARG2w
+ | cmp CARG4w, #0
+ | eor CARG3w, CARG1w, CARG1w, asr #31
+ | eor CARG4w, CARG2w, CARG2w, asr #31
+ | sub CARG3w, CARG3w, CARG1w, asr #31
+ | sub CARG4w, CARG4w, CARG2w, asr #31
+ | udiv CARG1w, CARG3w, CARG4w
+ | msub CARG1w, CARG1w, CARG4w, CARG3w
+ | ccmp CARG1w, #0, #4, mi
+ | sub CARG3w, CARG1w, CARG4w
+ | csel CARG1w, CARG1w, CARG3w, eq
+ | eor CARG3w, CARG1w, CARG2w
+ | cmp CARG3w, #0
+ | cneg CARG1w, CARG1w, mi
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions.
+ |// Saveregs already performed. Callback slot number in [sp], g in r12.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | saveregs
+ | ldr CTSTATE, GL:x10->ctype_state
+ | mov GL, x10
+ | add x10, sp, # CFRAME_SPACE
+ | str w9, CTSTATE->cb.slot
+ | stp x0, x1, CTSTATE->cb.gpr[0]
+ | stp d0, d1, CTSTATE->cb.fpr[0]
+ | stp x2, x3, CTSTATE->cb.gpr[2]
+ | stp d2, d3, CTSTATE->cb.fpr[2]
+ | stp x4, x5, CTSTATE->cb.gpr[4]
+ | stp d4, d5, CTSTATE->cb.fpr[4]
+ | stp x6, x7, CTSTATE->cb.gpr[6]
+ | stp d6, d7, CTSTATE->cb.fpr[6]
+ | str x10, CTSTATE->cb.stack
+ | mov CARG1, CTSTATE
+ | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
+ | mov CARG2, sp
+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
+ | // Returns lua_State *.
+ | ldp BASE, RC, L:CRET1->base
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | mov L, CRET1
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub RC, RC, BASE
+ | st_vmstate ST_INTERP
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | ldr CTSTATE, GL->ctype_state
+ | stp BASE, CARG4, L->base
+ | str L, CTSTATE->L
+ | mov CARG1, CTSTATE
+ | mov CARG2, RA
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | ldp x0, x1, CTSTATE->cb.gpr[0]
+ | ldp d0, d1, CTSTATE->cb.fpr[0]
+ | b ->vm_leave_unw
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, x19
+ | stp fp, lr, [sp, #-32]!
+ | add fp, sp, #0
+ | str CCSTATE, [sp, #16]
+ | mov CCSTATE, x0
+ | ldr TMP0w, CCSTATE:x0->spadj
+ | ldrb TMP1w, CCSTATE->nsp
+ | add TMP2, CCSTATE, #offsetof(CCallState, stack)
+ | subs TMP1, TMP1, #1
+ | ldr TMP3, CCSTATE->func
+ | sub sp, fp, TMP0
+ | bmi >2
+ |1: // Copy stack slots
+ | ldr TMP0, [TMP2, TMP1, lsl #3]
+ | str TMP0, [sp, TMP1, lsl #3]
+ | subs TMP1, TMP1, #1
+ | bpl <1
+ |2:
+ | ldp x0, x1, CCSTATE->gpr[0]
+ | ldp d0, d1, CCSTATE->fpr[0]
+ | ldp x2, x3, CCSTATE->gpr[2]
+ | ldp d2, d3, CCSTATE->fpr[2]
+ | ldp x4, x5, CCSTATE->gpr[4]
+ | ldp d4, d5, CCSTATE->fpr[4]
+ | ldp x6, x7, CCSTATE->gpr[6]
+ | ldp d6, d7, CCSTATE->fpr[6]
+ | ldr x8, CCSTATE->retp
+ | blr TMP3
+ | mov sp, fp
+ | stp x0, x1, CCSTATE->gpr[0]
+ | stp d0, d1, CCSTATE->fpr[0]
+ | stp d2, d3, CCSTATE->fpr[2]
+ | ldr CCSTATE, [sp, #16]
+ | ldp fp, lr, [sp], #32
+ | ret
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RC = src2, JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | ldrh RBw, [PC, #2]
+ | ldr CARG2, [BASE, RC, lsl #3]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | checkint CARG1, >3
+ | checkint CARG2, >4
+ | cmp CARG1w, CARG2w
+ if (op == BC_ISLT) {
+ | csel PC, RB, PC, lt
+ } else if (op == BC_ISGE) {
+ | csel PC, RB, PC, ge
+ } else if (op == BC_ISLE) {
+ | csel PC, RB, PC, le
+ } else {
+ | csel PC, RB, PC, gt
+ }
+ |1:
+ | ins_next
+ |
+ |3: // RA not int.
+ | ldr FARG1, [BASE, RA, lsl #3]
+ | blo ->vmeta_comp
+ | ldr FARG2, [BASE, RC, lsl #3]
+ | cmp TISNUMhi, CARG2, lsr #32
+ | bhi >5
+ | bne ->vmeta_comp
+ | // RA number, RC int.
+ | scvtf FARG2, CARG2w
+ | b >5
+ |
+ |4: // RA int, RC not int
+ | ldr FARG2, [BASE, RC, lsl #3]
+ | blo ->vmeta_comp
+ | // RA int, RC number.
+ | scvtf FARG1, CARG1w
+ |
+ |5: // RA number, RC number
+ | fcmp FARG1, FARG2
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ if (op == BC_ISLT) {
+ | csel PC, RB, PC, lo
+ } else if (op == BC_ISGE) {
+ | csel PC, RB, PC, hs
+ } else if (op == BC_ISLE) {
+ | csel PC, RB, PC, ls
+ } else {
+ | csel PC, RB, PC, hi
+ }
+ | b <1
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1, RC = src2, JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add RC, BASE, RC, lsl #3
+ | ldrh RBw, [PC, #2]
+ | ldr CARG3, [RC]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | asr ITYPE, CARG3, #47
+ | cmn ITYPE, #-LJ_TISNUM
+ if (vk) {
+ | bls ->BC_ISEQN_Z
+ } else {
+ | bls ->BC_ISNEN_Z
+ }
+ | // RC is not a number.
+ | asr TMP0, CARG1, #47
+ |.if FFI
+ | // Check if RC or RA is a cdata.
+ | cmn ITYPE, #-LJ_TCDATA
+ | ccmn TMP0, #-LJ_TCDATA, #4, ne
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG1, CARG3
+ | bne >2
+ | // Tag and value are equal.
+ if (vk) {
+ |->BC_ISEQV_Z:
+ | mov PC, RB // Perform branch.
+ }
+ |1:
+ | ins_next
+ |
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | cmp ITYPE, TMP0
+ | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
+ if (vk) {
+ | bhi <1
+ } else {
+ | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | and TAB:CARG2, CARG1, #LJ_GCVMASK
+ | ldr TAB:TMP2, TAB:CARG2->metatable
+ if (vk) {
+ | cbz TAB:TMP2, <1 // No metatable?
+ | ldrb TMP1w, TAB:TMP2->nomm
+ | mov CARG4, #0 // ne = 0
+ | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
+ } else {
+ | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
+ | ldrb TMP1w, TAB:TMP2->nomm
+ | mov CARG4, #1 // ne = 1.
+ | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
+ }
+ | b ->vmeta_equal
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src, RC = str_const (~), JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | mvn RC, RC
+ | ldrh RBw, [PC, #2]
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | add PC, PC, #4
+ | movn TMP0, #~LJ_TSTR
+ |.if FFI
+ | asr ITYPE, CARG1, #47
+ |.endif
+ | add RB, PC, RB, lsl #2
+ | add CARG2, CARG2, TMP0, lsl #47
+ | sub RB, RB, #0x20000
+ |.if FFI
+ | cmn ITYPE, #-LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG1, CARG2
+ if (vk) {
+ | csel PC, RB, PC, eq
+ } else {
+ | csel PC, RB, PC, ne
+ }
+ | ins_next
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src, RC = num_const (~), JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add RC, KBASE, RC, lsl #3
+ | ldrh RBw, [PC, #2]
+ | ldr CARG3, [RC]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | checkint CARG1, >4
+ | checkint CARG3, >6
+ | cmp CARG1w, CARG3w
+ |1:
+ if (vk) {
+ | csel PC, RB, PC, eq
+ |2:
+ } else {
+ |2:
+ | csel PC, RB, PC, ne
+ }
+ |3:
+ | ins_next
+ |
+ |4: // RA not int.
+ |.if FFI
+ | blo >7
+ |.else
+ | blo <2
+ |.endif
+ | ldr FARG1, [BASE, RA, lsl #3]
+ | ldr FARG2, [RC]
+ | cmp TISNUMhi, CARG3, lsr #32
+ | bne >5
+ | // RA number, RC int.
+ | scvtf FARG2, CARG3w
+ |5:
+ | // RA number, RC number.
+ | fcmp FARG1, FARG2
+ | b <1
+ |
+ |6: // RA int, RC number
+ | ldr FARG2, [RC]
+ | scvtf FARG1, CARG1w
+ | fcmp FARG1, FARG2
+ | b <1
+ |
+ |.if FFI
+ |7:
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TCDATA
+ | bne <2
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src, RC = primitive_type (~), JMP with RC = target
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrh RBw, [PC, #2]
+ | add PC, PC, #4
+ | add RC, RC, #1
+ | add RB, PC, RB, lsl #2
+ |.if FFI
+ | asr ITYPE, TMP0, #47
+ | cmn ITYPE, #-LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ | cmn RC, ITYPE
+ |.else
+ | cmn RC, TMP0, asr #47
+ |.endif
+ | sub RB, RB, #0x20000
+ if (vk) {
+ | csel PC, RB, PC, eq
+ } else {
+ | csel PC, RB, PC, ne
+ }
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst or unused, RC = src, JMP with RC = target
+ | ldrh RBw, [PC, #2]
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | add PC, PC, #4
+ | mov_false TMP1
+ | add RB, PC, RB, lsl #2
+ | cmp TMP0, TMP1
+ | sub RB, RB, #0x20000
+ if (op == BC_ISTC || op == BC_IST) {
+ if (op == BC_ISTC) {
+ | csel RA, RA, RC, lo
+ }
+ | csel PC, RB, PC, lo
+ } else {
+ if (op == BC_ISFC) {
+ | csel RA, RA, RC, hs
+ }
+ | csel PC, RB, PC, hs
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | str TMP0, [BASE, RA, lsl #3]
+ }
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src, RC = -type
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cmn RC, TMP0, asr #47
+ | bne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | // RA = src, RC = -(TISNUM-1)
+ | ldr TMP0, [BASE, RA]
+ | checknum TMP0, ->vmeta_istype
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_NOT:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | mov_false TMP1
+ | mov_true TMP2
+ | cmp TMP0, TMP1
+ | csel TMP0, TMP1, TMP2, lo
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_UNM:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | asr ITYPE, TMP0, #47
+ | cmn ITYPE, #-LJ_TISNUM
+ | bhi ->vmeta_unm
+ | eor TMP0, TMP0, #U64x(80000000,00000000)
+ | bne >5
+ | negs TMP0w, TMP0w
+ | movz CARG3, #0x41e0, lsl #48 // 2^31.
+ | add TMP0, TMP0, TISNUM
+ | csel TMP0, TMP0, CARG3, vc
+ |5:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_LEN:
+ | // RA = dst, RC = src
+ | ldr CARG1, [BASE, RC, lsl #3]
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TSTR
+ | and CARG1, CARG1, #LJ_GCVMASK
+ | bne >2
+ | ldr CARG1w, STR:CARG1->len
+ |1:
+ | add CARG1, CARG1, TISNUM
+ | str CARG1, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |2:
+ | cmn ITYPE, #-LJ_TTAB
+ | bne ->vmeta_len
+#if LJ_52
+ | ldr TAB:CARG2, TAB:CARG1->metatable
+ | cbnz TAB:CARG2, >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | bl extern lj_tab_len // (GCtab *t)
+ | // Returns uint32_t (but less than 2^31).
+ | b <1
+ |
+#if LJ_52
+ |9:
+ | ldrb TMP1w, TAB:CARG2->nomm
+ | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
+ | b ->vmeta_len
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithcheck_int, target
+ | checkint CARG1, target
+ | checkint CARG2, target
+ |.endmacro
+ |
+ |.macro ins_arithcheck_num, target
+ | checknum CARG1, target
+ | checknum CARG2, target
+ |.endmacro
+ |
+ |.macro ins_arithcheck_nzdiv, target
+ | cbz CARG2w, target
+ |.endmacro
+ |
+ |.macro ins_arithhead
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||if (vk == 1) {
+ | and RC, RC, #255
+ | decode_RB RB, INS
+ ||} else {
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithload, reg1, reg2
+ | // RA = dst, RB = src1, RC = src2 | num_const
+ ||switch (vk) {
+ ||case 0:
+ | ldr reg1, [BASE, RB, lsl #3]
+ | ldr reg2, [KBASE, RC, lsl #3]
+ || break;
+ ||case 1:
+ | ldr reg1, [KBASE, RC, lsl #3]
+ | ldr reg2, [BASE, RB, lsl #3]
+ || break;
+ ||default:
+ | ldr reg1, [BASE, RB, lsl #3]
+ | ldr reg2, [BASE, RC, lsl #3]
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithfallback, ins
+ ||switch (vk) {
+ ||case 0:
+ | ins ->vmeta_arith_vn
+ || break;
+ ||case 1:
+ | ins ->vmeta_arith_nv
+ || break;
+ ||default:
+ | ins ->vmeta_arith_vv
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithmod, res, reg1, reg2
+ | fdiv d2, reg1, reg2
+ | frintm d2, d2
+ | fmsub res, d2, reg2, reg1
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins, fpins
+ | ins_arithhead
+ | ins_arithload CARG1, CARG2
+ | ins_arithcheck_int >5
+ |.if "intins" == "smull"
+ | smull CARG1, CARG1w, CARG2w
+ | cmp CARG1, CARG1, sxtw
+ | mov CARG1w, CARG1w
+ | ins_arithfallback bne
+ |.elif "intins" == "ins_arithmodi"
+ | ins_arithfallback ins_arithcheck_nzdiv
+ | bl ->vm_modi
+ |.else
+ | intins CARG1w, CARG1w, CARG2w
+ | ins_arithfallback bvs
+ |.endif
+ | add CARG1, CARG1, TISNUM
+ | str CARG1, [BASE, RA, lsl #3]
+ |4:
+ | ins_next
+ |
+ |5: // FP variant.
+ | ins_arithload FARG1, FARG2
+ | ins_arithfallback ins_arithcheck_num
+ | fpins FARG1, FARG1, FARG2
+ | str FARG1, [BASE, RA, lsl #3]
+ | b <4
+ |.endmacro
+ |
+ |.macro ins_arithfp, fpins
+ | ins_arithhead
+ | ins_arithload CARG1, CARG2
+ | ins_arithload FARG1, FARG2
+ | ins_arithfallback ins_arithcheck_num
+ |.if "fpins" == "fpow"
+ | bl extern pow
+ |.else
+ | fpins FARG1, FARG1, FARG2
+ |.endif
+ | str FARG1, [BASE, RA, lsl #3]
+ | ins_next
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn adds, fadd
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn subs, fsub
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithdn smull, fmul
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp fdiv
+ break;
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
+ | ins_arithdn ins_arithmodi, ins_arithmod
+ break;
+ case BC_POW:
+ | // NYI: (partial) integer arithmetic.
+ | ins_arithfp fpow
+ break;
+
+ case BC_CAT:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = src_start, RC = src_end
+ | str BASE, L->base
+ | sub CARG3, RC, RB
+ | add CARG2, BASE, RC, lsl #3
+ |->BC_CAT_Z:
+ | // RA = dst, CARG2 = top-1, CARG3 = left
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | ldrb RBw, [PC, #-1]
+ | ldr BASE, L->base
+ | cbnz CRET1, ->vmeta_binop
+ | ldr TMP0, [BASE, RB, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | // RA = dst, RC = str_const (~)
+ | mvn RC, RC
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | movn TMP1, #~LJ_TSTR
+ | add TMP0, TMP0, TMP1, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst, RC = cdata_const (~)
+ | mvn RC, RC
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | movn TMP1, #~LJ_TCDATA
+ | add TMP0, TMP0, TMP1, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst, RC = int16_literal
+ | sxth RCw, RCw
+ | add TMP0, RC, TISNUM
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KNUM:
+ | // RA = dst, RC = num_const
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KPRI:
+ | // RA = dst, RC = primitive_type (~)
+ | mvn TMP0, RC, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KNIL:
+ | // RA = base, RC = end
+ | add RA, BASE, RA, lsl #3
+ | add RC, BASE, RC, lsl #3
+ | str TISNIL, [RA], #8
+ |1:
+ | cmp RA, RC
+ | str TISNIL, [RA], #8
+ | blt <1
+ | ins_next_
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | // RA = dst, RC = uvnum
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RC, RC, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG2->v
+ | ldr TMP0, [CARG2]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_USETV:
+ | // RA = uvnum, RC = src
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+ | ldr CARG3, [BASE, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG1->v
+ | ldrb TMP2w, UPVAL:CARG1->marked
+ | ldrb TMP0w, UPVAL:CARG1->closed
+ | asr ITYPE, CARG3, #47
+ | str CARG3, [CARG2]
+ | add ITYPE, ITYPE, #-LJ_TISGCV
+ | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
+ | ccmp TMP0w, #0, #4, ne // && uv->closed
+ | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
+ | bhi >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if new value is white.
+ | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
+ | ldrb TMP1w, GCOBJ:CARG3->gch.marked
+ | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
+ | beq <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov CARG1, GL
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETS:
+ | // RA = uvnum, RC = str_const (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | mvn RC, RC
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+ | ldr STR:CARG3, [KBASE, RC, lsl #3]
+ | movn TMP0, #~LJ_TSTR
+ | ldr CARG2, UPVAL:CARG1->v
+ | ldrb TMP2w, UPVAL:CARG1->marked
+ | add TMP0, STR:CARG3, TMP0, lsl #47
+ | ldrb TMP1w, STR:CARG3->marked
+ | str TMP0, [CARG2]
+ | tbnz TMP2w, #2, >2 // isblack(uv)
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | ldrb TMP0w, UPVAL:CARG1->closed
+ | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
+ | ccmp TMP0w, #0, #0, ne
+ | beq <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov CARG1, GL
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETN:
+ | // RA = uvnum, RC = num_const
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG2->v
+ | str TMP0, [CARG2]
+ | ins_next
+ break;
+ case BC_USETP:
+ | // RA = uvnum, RC = primitive_type (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+ | mvn TMP0, RC, lsl #47
+ | ldr CARG2, UPVAL:CARG2->v
+ | str TMP0, [CARG2]
+ | ins_next
+ break;
+
+ case BC_UCLO:
+ | // RA = level, RC = target
+ | ldr CARG3, L->openupval
+ | add RC, PC, RC, lsl #2
+ | str BASE, L->base
+ | sub PC, RC, #0x20000
+ | cbz CARG3, >1
+ | mov CARG1, L
+ | add CARG2, BASE, RA, lsl #3
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | ldr BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst, RC = proto_const (~) (holding function prototype)
+ | mvn RC, RC
+ | str BASE, L->base
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | str PC, SAVE_PC
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | mov CARG1, L
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | bl extern lj_func_newL_gc
+ | // Returns GCfuncL *.
+ | ldr BASE, L->base
+ | movn TMP0, #~LJ_TFUNC
+ | add CRET1, CRET1, TMP0, lsl #47
+ | str CRET1, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst, RC = (hbits|asize) | tab_const (~)
+ | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | mov CARG1, L
+ | cmp CARG3, CARG4
+ | bhs >5
+ |1:
+ if (op == BC_TNEW) {
+ | and CARG2, RC, #0x7ff
+ | lsr CARG3, RC, #11
+ | cmp CARG2, #0x7ff
+ | mov TMP0, #0x801
+ | csel CARG2, CARG2, TMP0, ne
+ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+ | // Returns GCtab *.
+ } else {
+ | mvn RC, RC
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Returns GCtab *.
+ }
+ | ldr BASE, L->base
+ | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CRET1, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5:
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
+ | mov CARG1, L
+ | b <1
+ break;
+
+ case BC_GGET:
+ | // RA = dst, RC = str_const (~)
+ case BC_GSET:
+ | // RA = dst, RC = str_const (~)
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ | mvn RC, RC
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr TAB:CARG2, LFUNC:CARG1->env
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ break;
+
+ case BC_TGETV:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tgetv
+ | checkint TMP1, >9 // Integer key?
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, TMP1, uxtw #3
+ | cmp TMP1w, CARG1w // In array part?
+ | bhs ->vmeta_tgetv
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | beq >5
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetv
+ |
+ |9:
+ | asr ITYPE, TMP1, #47
+ | cmn ITYPE, #-LJ_TSTR // String key?
+ | bne ->vmeta_tgetv
+ | and STR:RC, TMP1, #LJ_GCVMASK
+ | b ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = str_const (~)
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
+ | ldr TMP1w, TAB:CARG2->hmask
+ | ldr TMP2w, STR:RC->hash
+ | ldr NODE:CARG3, TAB:CARG2->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ |1:
+ | ldp TMP0, CARG1, NODE:CARG3->val
+ | ldr NODE:CARG3, NODE:CARG3->next
+ | cmp CARG1, CARG4
+ | bne >4
+ | cmp TMP0, TISNIL
+ | beq >5
+ |3:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | cbnz NODE:CARG3, <1
+ | // End of hash chain: key not found, nil result.
+ | mov TMP0, TISNIL
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <3 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
+ | b ->vmeta_tgets
+ break;
+ case BC_TGETB:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = index
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | checktab CARG2, ->vmeta_tgetb
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, RC, lsl #3
+ | cmp RCw, CARG1w // In array part?
+ | bhs ->vmeta_tgetb
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | beq >5
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetb
+ break;
+ case BC_TGETR:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = key
+ | ldr CARG1, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | and TAB:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG3, TAB:CARG1->array
+ | ldr TMP2w, TAB:CARG1->asize
+ | add CARG3, CARG3, TMP1w, uxtw #3
+ | cmp TMP1w, TMP2w // In array part?
+ | bhs ->vmeta_tgetr
+ | ldr TMP0, [CARG3]
+ |->BC_TGETR_Z:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tsetv
+ | checkint TMP1, >9 // Integer key?
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, TMP1, uxtw #3
+ | cmp TMP1w, CARG1w // In array part?
+ | bhs ->vmeta_tsetv
+ | ldr TMP1, [CARG3]
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >5
+ |1:
+ | str TMP0, [CARG3]
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetv
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <2
+ |
+ |9:
+ | asr ITYPE, TMP1, #47
+ | cmn ITYPE, #-LJ_TSTR // String key?
+ | bne ->vmeta_tsetv
+ | and STR:RC, TMP1, #LJ_GCVMASK
+ | b ->BC_TSETS_Z
+ break;
+ case BC_TSETS:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = str_const (~)
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tsets1
+ |->BC_TSETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
+ | ldr TMP1w, TAB:CARG2->hmask
+ | ldr TMP2w, STR:RC->hash
+ | ldr NODE:CARG3, TAB:CARG2->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
+ |1:
+ | ldp TMP1, CARG1, NODE:CARG3->val
+ | ldr NODE:TMP3, NODE:CARG3->next
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp CARG1, CARG4
+ | bne >5
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >4
+ |2:
+ | str TMP0, NODE:CARG3->val
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |3:
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <2 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsets
+ |
+ |5: // Follow hash chain.
+ | mov NODE:CARG3, NODE:TMP3
+ | cbnz NODE:TMP3, <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, >6 // No metatable: continue.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | // 'no __newindex' flag NOT set: check.
+ | tbz TMP1w, #MM_newindex, ->vmeta_tsets
+ |6:
+ | movn TMP1, #~LJ_TSTR
+ | str PC, SAVE_PC
+ | add TMP0, STR:RC, TMP1, lsl #47
+ | str BASE, L->base
+ | mov CARG1, L
+ | str TMP0, TMPD
+ | add CARG3, sp, TMPDofs
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Returns TValue *.
+ | ldr BASE, L->base
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | str TMP0, [CRET1]
+ | b <3 // No 2nd write barrier needed.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <3
+ break;
+ case BC_TSETB:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = index
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | checktab CARG2, ->vmeta_tsetb
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, RC, lsl #3
+ | cmp RCw, CARG1w // In array part?
+ | bhs ->vmeta_tsetb
+ | ldr TMP1, [CARG3]
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >5
+ |1:
+ | str TMP0, [CARG3]
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetb
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <2
+ break;
+ case BC_TSETR:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ldr CARG1, TAB:CARG2->array
+ | ldrb TMP2w, TAB:CARG2->marked
+ | ldr CARG4w, TAB:CARG2->asize
+ | add CARG1, CARG1, TMP1, uxtw #3
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | cmp TMP1w, CARG4w // In array part?
+ | bhs ->vmeta_tsetr
+ |->BC_TSETR_Z:
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | str TMP0, [CARG1]
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP0
+ | b <2
+ break;
+
+ case BC_TSETM:
+ | // RA = base (table at base-1), RC = num_const (start index)
+ | add RA, BASE, RA, lsl #3
+ |1:
+ | ldr RBw, SAVE_MULTRES
+ | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
+ | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
+ | sub RB, RB, #8
+ | cbz RB, >4 // Nothing to copy?
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3w, TMP1w, RBw, lsr #3
+ | ldr CARG4, TAB:CARG2->array
+ | cmp CARG3, CARG1
+ | add RB, RA, RB
+ | bhi >5
+ | add TMP1, CARG4, TMP1w, uxtw #3
+ | ldrb TMP2w, TAB:CARG2->marked
+ |3: // Copy result slots to table.
+ | ldr TMP0, [RA], #8
+ | str TMP0, [TMP1], #8
+ | cmp RA, RB
+ | blo <3
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | // Must not reallocate the stack.
+ | b <1
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <4
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALLM:
+ | // RA = base, (RB = nresults+1,) RC = extra_nargs
+ | ldr TMP0w, SAVE_MULTRES
+ | decode_RC8RD NARGS8:RC, RC
+ | add NARGS8:RC, NARGS8:RC, TMP0
+ | b ->BC_CALL_Z
+ break;
+ case BC_CALL:
+ | decode_RC8RD NARGS8:RC, RC
+ | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
+ |->BC_CALL_Z:
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | add BASE, BASE, RA, lsl #3
+ | ldr CARG3, [BASE]
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add BASE, BASE, #16
+ | checkfunc CARG3, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | // RA = base, (RB = 0,) RC = extra_nargs
+ | ldr TMP0w, SAVE_MULTRES
+ | add NARGS8:RC, TMP0, RC, lsl #3
+ | b ->BC_CALLT1_Z
+ break;
+ case BC_CALLT:
+ | lsl NARGS8:RC, RC, #3
+ | // RA = base, (RB = 0,) RC = (nargs+1)*8
+ |->BC_CALLT1_Z:
+ | add RA, BASE, RA, lsl #3
+ | ldr TMP1, [RA]
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add RA, RA, #16
+ | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
+ | ldr PC, [BASE, FRAME_PC]
+ |->BC_CALLT2_Z:
+ | mov RB, #0
+ | ldrb TMP2w, LFUNC:CARG3->ffid
+ | tst PC, #FRAME_TYPE
+ | bne >7
+ |1:
+ | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
+ | cbz NARGS8:RC, >3
+ |2:
+ | ldr TMP0, [RA, RB]
+ | add TMP1, RB, #8
+ | cmp TMP1, NARGS8:RC
+ | str TMP0, [BASE, RB]
+ | mov RB, TMP1
+ | bne <2
+ |3:
+ | cmp TMP2, #1 // (> FF_C) Calling a fast function?
+ | bhi >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function with a Lua frame below.
+ | ldrb RAw, [PC, #-3]
+ | sub CARG1, BASE, RA, lsl #3
+ | ldr LFUNC:CARG1, [CARG1, #-32]
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG1, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG1, #PC2PROTO(k)]
+ | b <4
+ |
+ |7: // Tailcall from a vararg function.
+ | eor PC, PC, #FRAME_VARG
+ | tst PC, #FRAME_TYPEP // Vararg frame below?
+ | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
+ | bne <1
+ | sub BASE, BASE, PC
+ | ldr PC, [BASE, FRAME_PC]
+ | tst PC, #FRAME_TYPE
+ | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
+ | b <1
+ break;
+
+ case BC_ITERC:
+ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA, lsl #3
+ | ldr CARG3, [RA, #-24]
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | ldp CARG1, CARG2, [RA, #-16]
+ | add BASE, RA, #16
+ | mov NARGS8:RC, #16 // Iterators get 2 arguments.
+ | str CARG3, [RA] // Copy callable.
+ | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
+ | checkfunc CARG3, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+ | // NYI: add hotloop, record BC_ITERN.
+ |.endif
+ | add RA, BASE, RA, lsl #3
+ | ldr TAB:RB, [RA, #-16]
+ | ldrh TMP3w, [PC, #2]
+ | ldr CARG1w, [RA, #-8] // Get index from control var.
+ | add PC, PC, #4
+ | add TMP3, PC, TMP3, lsl #2
+ | and TAB:RB, RB, #LJ_GCVMASK
+ | sub TMP3, TMP3, #0x20000
+ | ldr TMP1w, TAB:RB->asize
+ | ldr CARG2, TAB:RB->array
+ |1: // Traverse array part.
+ | subs RC, CARG1, TMP1
+ | add CARG3, CARG2, CARG1, lsl #3
+ | bhs >5 // Index points after array part?
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | cinc CARG1, CARG1, eq // Skip holes in array part.
+ | beq <1
+ | add CARG1, CARG1, TISNUM
+ | stp CARG1, TMP0, [RA]
+ | add CARG1, CARG1, #1
+ |3:
+ | str CARG1w, [RA, #-8] // Update control var.
+ | mov PC, TMP3
+ |4:
+ | ins_next
+ |
+ |5: // Traverse hash part.
+ | ldr TMP2w, TAB:RB->hmask
+ | ldr NODE:RB, TAB:RB->node
+ |6:
+ | add CARG1, RC, RC, lsl #1
+ | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
+ | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
+ | bhi <4
+ | ldp TMP0, CARG1, NODE:CARG3->val
+ | cmp TMP0, TISNIL
+ | add RC, RC, #1
+ | beq <6 // Skip holes in hash part.
+ | stp CARG1, TMP0, [RA]
+ | add CARG1, RC, TMP1
+ | b <3
+ break;
+
+ case BC_ISNEXT:
+ | // RA = base, RC = target (points to ITERN)
+ | add RA, BASE, RA, lsl #3
+ | ldr CFUNC:CARG1, [RA, #-24]
+ | add RC, PC, RC, lsl #2
+ | ldp TAB:CARG3, CARG4, [RA, #-16]
+ | sub RC, RC, #0x20000
+ | checkfunc CFUNC:CARG1, >5
+ | asr TMP0, TAB:CARG3, #47
+ | ldrb TMP1w, CFUNC:CARG1->ffid
+ | cmn TMP0, #-LJ_TTAB
+ | ccmp CARG4, TISNIL, #0, eq
+ | ccmp TMP1w, #FF_next_N, #0, eq
+ | bne >5
+ | mov TMP0w, #0xfffe7fff
+ | lsl TMP0, TMP0, #32
+ | str TMP0, [RA, #-8] // Initialize control var.
+ |1:
+ | mov PC, RC
+ | ins_next
+ |
+ |5: // Despecialize bytecode if any of the checks fail.
+ | mov TMP0, #BC_JMP
+ | mov TMP1, #BC_ITERC
+ | strb TMP0w, [PC, #-4]
+ | strb TMP1w, [RC]
+ | b <1
+ break;
+
+ case BC_VARG:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = base, RB = (nresults+1), RC = numparams
+ | ldr TMP1, [BASE, FRAME_PC]
+ | add RC, BASE, RC, lsl #3
+ | add RA, BASE, RA, lsl #3
+ | add RC, RC, #FRAME_VARG
+ | add TMP2, RA, RB, lsl #3
+ | sub RC, RC, TMP1 // RC = vbase
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | sub TMP3, BASE, #16 // TMP3 = vtop
+ | cbz RB, >5
+ | sub TMP2, TMP2, #16
+ |1: // Copy vararg slots to destination slots.
+ | cmp RC, TMP3
+ | ldr TMP0, [RC], #8
+ | csel TMP0, TMP0, TISNIL, lo
+ | cmp RA, TMP2
+ | str TMP0, [RA], #8
+ | blo <1
+ |2:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | ldr TMP0, L->maxstack
+ | subs TMP2, TMP3, RC
+ | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
+ | add RB, RB, #8
+ | add TMP1, RA, TMP2
+ | str RBw, SAVE_MULTRES
+ | ble <2 // Nothing to copy.
+ | cmp TMP1, TMP0
+ | bhi >7
+ |6:
+ | ldr TMP0, [RC], #8
+ | str TMP0, [RA], #8
+ | cmp RC, TMP3
+ | blo <6
+ | b <2
+ |
+ |7: // Grow stack for varargs.
+ | lsr CARG2, TMP2, #3
+ | stp BASE, RA, L->base
+ | mov CARG1, L
+ | sub RC, RC, BASE // Need delta, because BASE may change.
+ | str PC, SAVE_PC
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldp BASE, RA, L->base
+ | add RC, BASE, RC
+ | sub TMP3, BASE, #16
+ | b <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | // RA = results, RC = extra results
+ | ldr TMP0w, SAVE_MULTRES
+ | ldr PC, [BASE, FRAME_PC]
+ | add RA, BASE, RA, lsl #3
+ | add RC, TMP0, RC, lsl #3
+ | b ->BC_RETM_Z
+ break;
+
+ case BC_RET:
+ | // RA = results, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | add RA, BASE, RA, lsl #3
+ |->BC_RETM_Z:
+ | str RCw, SAVE_MULTRES
+ |1:
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | bne ->BC_RETV2_Z
+ |
+ |->BC_RET_Z:
+ | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
+ | ldr INSw, [PC, #-4]
+ | subs TMP1, RC, #8
+ | sub CARG3, BASE, #16
+ | beq >3
+ |2:
+ | ldr TMP0, [RA], #8
+ | add BASE, BASE, #8
+ | sub TMP1, TMP1, #8
+ | str TMP0, [BASE, #-24]
+ | cbnz TMP1, <2
+ |3:
+ | decode_RA RA, INS
+ | sub CARG4, CARG3, RA, lsl #3
+ | decode_RB RB, INS
+ | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
+ |5:
+ | cmp RC, RB, lsl #3 // More results expected?
+ | blo >6
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | mov BASE, CARG4
+ | ldr CARG2, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add BASE, BASE, #8
+ | add RC, RC, #8
+ | str TISNIL, [BASE, #-24]
+ | b <5
+ |
+ |->BC_RETV1_Z: // Non-standard return case.
+ | add RA, BASE, RA, lsl #3
+ |->BC_RETV2_Z:
+ | tst CARG2, #FRAME_TYPEP
+ | bne ->vm_return
+ | // Return from vararg function: relocate BASE down.
+ | sub BASE, BASE, CARG2
+ | ldr PC, [BASE, FRAME_PC]
+ | b <1
+ break;
+
+ case BC_RET0: case BC_RET1:
+ | // RA = results, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | str RCw, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | bne ->BC_RETV1_Z
+ | ldr INSw, [PC, #-4]
+ if (op == BC_RET1) {
+ | ldr TMP0, [BASE, RA, lsl #3]
+ }
+ | sub CARG4, BASE, #16
+ | decode_RA RA, INS
+ | sub BASE, CARG4, RA, lsl #3
+ if (op == BC_RET1) {
+ | str TMP0, [CARG4], #8
+ }
+ | decode_RB RB, INS
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ |5:
+ | cmp RC, RB, lsl #3
+ | blo >6
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG2, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add RC, RC, #8
+ | str TISNIL, [CARG4], #8
+ | b <5
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
+ |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
+ |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
+ |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ | // RA = base, RC = target (after end of loop or start of loop)
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | add RA, BASE, RA, lsl #3
+ | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
+ | ldr CARG3, FOR_STEP // CARG3 = STEP
+ if (op != BC_JFORL) {
+ | add RC, PC, RC, lsl #2
+ | sub RC, RC, #0x20000
+ }
+ | checkint CARG1, >5
+ if (!vk) {
+ | checkint CARG2, ->vmeta_for
+ | checkint CARG3, ->vmeta_for
+ | tbnz CARG3w, #31, >4
+ | cmp CARG1w, CARG2w
+ } else {
+ | adds CARG1w, CARG1w, CARG3w
+ | bvs >2
+ | add TMP0, CARG1, TISNUM
+ | tbnz CARG3w, #31, >4
+ | cmp CARG1w, CARG2w
+ }
+ |1:
+ if (op == BC_FORI) {
+ | csel PC, RC, PC, gt
+ } else if (op == BC_JFORI) {
+ | ldrh RCw, [RC, #-2]
+ } else if (op == BC_IFORL) {
+ | csel PC, RC, PC, le
+ }
+ if (vk) {
+ | str TMP0, FOR_IDX
+ | str TMP0, FOR_EXT
+ } else {
+ | str CARG1, FOR_EXT
+ }
+ if (op == BC_JFORI || op == BC_JFORL) {
+ | ble =>BC_JLOOP
+ }
+ |2:
+ | ins_next
+ |
+ |4: // Invert check for negative step.
+ | cmp CARG2w, CARG1w
+ | b <1
+ |
+ |5: // FP loop.
+ | ldp d0, d1, FOR_IDX
+ | blo ->vmeta_for
+ if (!vk) {
+ | checknum CARG2, ->vmeta_for
+ | checknum CARG3, ->vmeta_for
+ | str d0, FOR_EXT
+ } else {
+ | ldr d2, FOR_STEP
+ | fadd d0, d0, d2
+ }
+ | tbnz CARG3, #63, >7
+ | fcmp d0, d1
+ |6:
+ if (vk) {
+ | str d0, FOR_IDX
+ | str d0, FOR_EXT
+ }
+ if (op == BC_FORI) {
+ | csel PC, RC, PC, hi
+ } else if (op == BC_JFORI) {
+ | ldrh RCw, [RC, #-2]
+ | bls =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | csel PC, RC, PC, ls
+ } else {
+ | bls =>BC_JLOOP
+ }
+ | b <2
+ |
+ |7: // Invert check for negative step.
+ | fcmp d1, d0
+ | b <6
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | // RA = base, RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add TMP1, BASE, RA, lsl #3
+ | cmp CARG1, TISNIL
+ | beq >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | str CARG1, [TMP1, #-8]
+ | b =>BC_JLOOP
+ } else {
+ | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
+ | sub PC, TMP0, #0x20000
+ | str CARG1, [TMP1, #-8]
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | // RA = base, RC = target (loop extent)
+ | // Note: RA/RC is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows.
+ break;
+
+ case BC_ILOOP:
+ | // RA = base, RC = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | NYI
+ |.endif
+ break;
+
+ case BC_JMP:
+ | // RA = base (only used by trace recorder), RC = target
+ | add RC, PC, RC, lsl #2
+ | sub PC, RC, #0x20000
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | bhi ->vm_growstack_l
+ |2:
+ | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
+ | blo >3
+ if (op == BC_JFUNCF) {
+ | decode_RD RC, INS
+ | b =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | str TISNIL, [BASE, NARGS8:RC]
+ | add NARGS8:RC, NARGS8:RC, #8
+ | b <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | NYI // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | add TMP2, BASE, RC
+ | add RA, RA, RC
+ | add TMP0, RC, #16+FRAME_VARG
+ | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC.
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
+ | bhs ->vm_growstack_l
+ | sub RC, TMP2, #16
+ | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+ | mov RA, BASE
+ | mov BASE, TMP2
+ | cbz TMP1, >2
+ |1:
+ | cmp RA, RC // Less args than parameters?
+ | bhs >3
+ | ldr TMP0, [RA]
+ | sub TMP1, TMP1, #1
+ | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
+ | str TMP0, [TMP2], #8
+ | cbnz TMP1, <1
+ |2:
+ | ins_next
+ |
+ |3:
+ | sub TMP1, TMP1, #1
+ | str TISNIL, [TMP2], #8
+ | cbz TMP1, <2
+ | b <3
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
+ if (op == BC_FUNCC) {
+ | ldr CARG4, CFUNC:CARG3->f
+ } else {
+ | ldr CARG4, GL->wrapf
+ }
+ | add CARG2, RA, NARGS8:RC
+ | ldr CARG1, L->maxstack
+ | add RC, BASE, NARGS8:RC
+ | cmp CARG2, CARG1
+ | stp BASE, RC, L->base
+ if (op == BC_FUNCCW) {
+ | ldr CARG2, CFUNC:CARG3->f
+ }
+ | mv_vmstate TMP0w, C
+ | mov CARG1, L
+ | bhi ->vm_growstack_c // Need to grow stack.
+ | st_vmstate TMP0w
+ | blr CARG4 // (lua_State *L [, lua_CFunction f])
+ | // Returns nresults.
+ | ldp BASE, TMP1, L->base
+ | str L, GL->cur_L
+ | sbfiz RC, CRET1, #3, #32
+ | st_vmstate ST_INTERP
+ | ldr PC, [BASE, FRAME_PC]
+ | sub RA, TMP1, RC // RA = L->top - nresults*8
+ | b ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ int i, cf = CFRAME_SIZE >> 3;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
+ "\t.align 3\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
+ "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
+ fcofs, CFRAME_SIZE, cf, cf-1);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+ 64+i, cf-i-4);
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE0:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
+ "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
+ "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
+ "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
+ "\t.align 3\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.long .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
+ "\t.align 3\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.long .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.long .LASFDE2-.Lframe1\n"
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
+ "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
+ fcofs, CFRAME_SIZE, cf, cf-1);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+ 64+i, cf-i-4);
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE2:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.long .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
+ "\t.align 3\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.long .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.long .LASFDE3-.Lframe2\n"
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
+ "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
+ "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
+ "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
+ "\t.align 3\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+ break;
+ default:
+ break;
+ }
+}
+
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 3bf5a99..7cfdf4b 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,6 @@
|// Low-level VM code for MIPS CPUs.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
|.arch mips
|.section code_op, code_sub
@@ -399,7 +399,7 @@ static void build_subroutines(BuildCtx *ctx)
| // - A return back from a lua_call() with (high) nresults adjustment.
| load_got lj_state_growstack
| move MULTRES, RD
- | move CARG2, TMP2
+ | srl CARG2, TMP2, 3
| call_intern lj_state_growstack // (lua_State *L, int n)
|. move CARG1, L
| lw TMP2, SAVE_NRES
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index d7809f1..2a7a745 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,6 +1,6 @@
-|// Low-level VM code for PowerPC CPUs.
+|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
|.arch ppc
|.section code_op, code_sub
@@ -18,7 +18,7 @@
|// DynASM defines used by the PPC port:
|//
|// P64 64 bit pointers (only for GPR64 testing).
-|// Note: a full PPC64 _LP64 port is not planned.
+|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -587,7 +587,7 @@ static void build_subroutines(BuildCtx *ctx)
| // - A return back from a lua_call() with (high) nresults adjustment.
| stp BASE, L->top // Save current top held in BASE (yes).
| mr SAVE0, RD
- | mr CARG2, TMP2
+ | srwi CARG2, TMP2, 3
| mr CARG1, L
| bl extern lj_state_growstack // (lua_State *L, int n)
| lwz TMP2, SAVE_NRES
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
deleted file mode 100644
index ee39403..0000000
--- a/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3685 +0,0 @@
-|// Low-level VM code for PowerPC/e500 CPUs.
-|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
-|
-|.arch ppc
-|.section code_op, code_sub
-|
-|.actionlist build_actionlist
-|.globals GLOB_
-|.globalnames globnames
-|.externnames extnames
-|
-|// Note: The ragged indentation of the instructions is intentional.
-|// The starting columns indicate data dependencies.
-|
-|//-----------------------------------------------------------------------
-|
-|// Fixed register assignments for the interpreter.
-|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
-|
-|// The following must be C callee-save (but BASE is often refetched).
-|.define BASE, r14 // Base of current Lua stack frame.
-|.define KBASE, r15 // Constants of current Lua function.
-|.define PC, r16 // Next PC.
-|.define DISPATCH, r17 // Opcode dispatch table.
-|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
-|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
-|
-|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
-|.define TISNUM, r22
-|.define TISSTR, r23
-|.define TISTAB, r24
-|.define TISFUNC, r25
-|.define TISNIL, r26
-|.define TOBIT, r27
-|.define ZERO, TOBIT // Zero in lo word.
-|
-|// The following temporaries are not saved across C calls, except for RA.
-|.define RA, r20 // Callee-save.
-|.define RB, r10
-|.define RC, r11
-|.define RD, r12
-|.define INS, r7 // Overlaps CARG5.
-|
-|.define TMP0, r0
-|.define TMP1, r8
-|.define TMP2, r9
-|.define TMP3, r6 // Overlaps CARG4.
-|
-|// Saved temporaries.
-|.define SAVE0, r21
-|
-|// Calling conventions.
-|.define CARG1, r3
-|.define CARG2, r4
-|.define CARG3, r5
-|.define CARG4, r6 // Overlaps TMP3.
-|.define CARG5, r7 // Overlaps INS.
-|
-|.define CRET1, r3
-|.define CRET2, r4
-|
-|// Stack layout while in interpreter. Must match with lj_frame.h.
-|.define SAVE_LR, 188(sp)
-|.define CFRAME_SPACE, 184 // Delta for sp.
-|// Back chain for sp: 184(sp) <-- sp entering interpreter
-|.define SAVE_r31, 176(sp) // 64 bit register saves.
-|.define SAVE_r30, 168(sp)
-|.define SAVE_r29, 160(sp)
-|.define SAVE_r28, 152(sp)
-|.define SAVE_r27, 144(sp)
-|.define SAVE_r26, 136(sp)
-|.define SAVE_r25, 128(sp)
-|.define SAVE_r24, 120(sp)
-|.define SAVE_r23, 112(sp)
-|.define SAVE_r22, 104(sp)
-|.define SAVE_r21, 96(sp)
-|.define SAVE_r20, 88(sp)
-|.define SAVE_r19, 80(sp)
-|.define SAVE_r18, 72(sp)
-|.define SAVE_r17, 64(sp)
-|.define SAVE_r16, 56(sp)
-|.define SAVE_r15, 48(sp)
-|.define SAVE_r14, 40(sp)
-|.define SAVE_CR, 36(sp)
-|.define UNUSED1, 32(sp)
-|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
-|.define SAVE_NRES, 24(sp)
-|.define SAVE_CFRAME, 20(sp)
-|.define SAVE_L, 16(sp)
-|.define SAVE_PC, 12(sp)
-|.define SAVE_MULTRES, 8(sp)
-|// Next frame lr: 4(sp)
-|// Back chain for sp: 0(sp) <-- sp while in interpreter
-|
-|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
-|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
-|
-|.macro saveregs
-| stwu sp, -CFRAME_SPACE(sp)
-| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
-| mflr r0; mfcr r12
-| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
-| stw r0, SAVE_LR; stw r12, SAVE_CR
-| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
-|.endmacro
-|
-|.macro restoreregs
-| lwz r0, SAVE_LR; lwz r12, SAVE_CR
-| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
-| mtlr r0; mtcrf 0x38, r12
-| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
-| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
-| addi sp, sp, CFRAME_SPACE
-|.endmacro
-|
-|// Type definitions. Some of these are only used for documentation.
-|.type L, lua_State, LREG
-|.type GL, global_State
-|.type TVALUE, TValue
-|.type GCOBJ, GCobj
-|.type STR, GCstr
-|.type TAB, GCtab
-|.type LFUNC, GCfuncL
-|.type CFUNC, GCfuncC
-|.type PROTO, GCproto
-|.type UPVAL, GCupval
-|.type NODE, Node
-|.type NARGS8, int
-|.type TRACE, GCtrace
-|
-|//-----------------------------------------------------------------------
-|
-|// These basic macros should really be part of DynASM.
-|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
-|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
-|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
-|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
-|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
-|
-|// Trap for not-yet-implemented parts.
-|.macro NYI; tw 4, sp, sp; .endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Access to frame relative to BASE.
-|.define FRAME_PC, -8
-|.define FRAME_FUNC, -4
-|
-|// Instruction decode.
-|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
-|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
-|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
-|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
-|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
-|
-|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
-|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
-|
-|// Instruction fetch.
-|.macro ins_NEXT1
-| lwz INS, 0(PC)
-| addi PC, PC, 4
-|.endmacro
-|// Instruction decode+dispatch.
-|.macro ins_NEXT2
-| decode_OP4 TMP1, INS
-| decode_RB8 RB, INS
-| decode_RD8 RD, INS
-| lwzx TMP0, DISPATCH, TMP1
-| decode_RA8 RA, INS
-| decode_RC8 RC, INS
-| mtctr TMP0
-| bctr
-|.endmacro
-|.macro ins_NEXT
-| ins_NEXT1
-| ins_NEXT2
-|.endmacro
-|
-|// Instruction footer.
-|.if 1
-| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
-| .define ins_next, ins_NEXT
-| .define ins_next_, ins_NEXT
-| .define ins_next1, ins_NEXT1
-| .define ins_next2, ins_NEXT2
-|.else
-| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
-| // Affects only certain kinds of benchmarks (and only with -j off).
-| .macro ins_next
-| b ->ins_next
-| .endmacro
-| .macro ins_next1
-| .endmacro
-| .macro ins_next2
-| b ->ins_next
-| .endmacro
-| .macro ins_next_
-| ->ins_next:
-| ins_NEXT
-| .endmacro
-|.endif
-|
-|// Call decode and dispatch.
-|.macro ins_callt
-| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-| lwz PC, LFUNC:RB->pc
-| lwz INS, 0(PC)
-| addi PC, PC, 4
-| decode_OP4 TMP1, INS
-| decode_RA8 RA, INS
-| lwzx TMP0, DISPATCH, TMP1
-| add RA, RA, BASE
-| mtctr TMP0
-| bctr
-|.endmacro
-|
-|.macro ins_call
-| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
-| stw PC, FRAME_PC(BASE)
-| ins_callt
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Macros to test operand types.
-|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
-|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
-|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
-|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
-|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
-|.macro checkok, label; blt label; .endmacro
-|.macro checkfail, label; bge label; .endmacro
-|.macro checkanyfail, label; bns label; .endmacro
-|.macro checkallok, label; bso label; .endmacro
-|
-|.macro branch_RD
-| srwi TMP0, RD, 1
-| add PC, PC, TMP0
-| addis PC, PC, -(BCBIAS_J*4 >> 16)
-|.endmacro
-|
-|// Assumes DISPATCH is relative to GL.
-#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
-#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-|
-#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-|
-|.macro hotloop
-| NYI
-|.endmacro
-|
-|.macro hotcall
-| NYI
-|.endmacro
-|
-|// Set current VM state. Uses TMP0.
-|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
-|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
-|
-|// Move table write barrier back. Overwrites mark and tmp.
-|.macro barrierback, tab, mark, tmp
-| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
-| // Assumes LJ_GC_BLACK is 0x04.
-| rlwinm mark, mark, 0, 30, 28 // black2gray(tab)
-| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
-| stb mark, tab->marked
-| stw tmp, tab->gclist
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-
-/* Generate subroutines used by opcodes and other parts of the VM. */
-/* The .code_sub section should be last to help static branch prediction. */
-static void build_subroutines(BuildCtx *ctx)
-{
- |.code_sub
- |
- |//-----------------------------------------------------------------------
- |//-- Return handling ----------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
- | // See vm_return. Also: TMP2 = previous base.
- | andi. TMP0, PC, FRAME_P
- | evsplati TMP1, LJ_TTRUE
- | beq ->cont_dispatch
- |
- | // Return from pcall or xpcall fast func.
- | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
- | mr BASE, TMP2 // Restore caller base.
- | // Prepending may overwrite the pcall frame, so do it at the end.
- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
- |
- |->vm_returnc:
- | addi RD, RD, 8 // RD = (nresults+1)*8.
- | andi. TMP0, PC, FRAME_TYPE
- | cmpwi cr1, RD, 0
- | li CRET1, LUA_YIELD
- | beq cr1, ->vm_unwind_c_eh
- | mr MULTRES, RD
- | beq ->BC_RET_Z // Handle regular return to Lua.
- |
- |->vm_return:
- | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
- | // TMP0 = PC & FRAME_TYPE
- | cmpwi TMP0, FRAME_C
- | rlwinm TMP2, PC, 0, 0, 28
- | li_vmstate C
- | sub TMP2, BASE, TMP2 // TMP2 = previous base.
- | bne ->vm_returnp
- |
- | addic. TMP1, RD, -8
- | stw TMP2, L->base
- | lwz TMP2, SAVE_NRES
- | subi BASE, BASE, 8
- | st_vmstate
- | slwi TMP2, TMP2, 3
- | beq >2
- |1:
- | addic. TMP1, TMP1, -8
- | evldd TMP0, 0(RA)
- | addi RA, RA, 8
- | evstdd TMP0, 0(BASE)
- | addi BASE, BASE, 8
- | bne <1
- |
- |2:
- | cmpw TMP2, RD // More/less results wanted?
- | bne >6
- |3:
- | stw BASE, L->top // Store new top.
- |
- |->vm_leave_cp:
- | lwz TMP0, SAVE_CFRAME // Restore previous C frame.
- | li CRET1, 0 // Ok return status for vm_pcall.
- | stw TMP0, L->cframe
- |
- |->vm_leave_unw:
- | restoreregs
- | blr
- |
- |6:
- | ble >7 // Less results wanted?
- | // More results wanted. Check stack size and fill up results with nil.
- | lwz TMP1, L->maxstack
- | cmplw BASE, TMP1
- | bge >8
- | evstdd TISNIL, 0(BASE)
- | addi RD, RD, 8
- | addi BASE, BASE, 8
- | b <2
- |
- |7: // Less results wanted.
- | sub TMP0, RD, TMP2
- | cmpwi TMP2, 0 // LUA_MULTRET+1 case?
- | sub TMP0, BASE, TMP0 // Subtract the difference.
- | iseleq BASE, BASE, TMP0 // Either keep top or shrink it.
- | b <3
- |
- |8: // Corner case: need to grow stack for filling up results.
- | // This can happen if:
- | // - A C function grows the stack (a lot).
- | // - The GC shrinks the stack in between.
- | // - A return back from a lua_call() with (high) nresults adjustment.
- | stw BASE, L->top // Save current top held in BASE (yes).
- | mr SAVE0, RD
- | mr CARG2, TMP2
- | mr CARG1, L
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz TMP2, SAVE_NRES
- | mr RD, SAVE0
- | slwi TMP2, TMP2, 3
- | lwz BASE, L->top // Need the (realloced) L->top in BASE.
- | b <2
- |
- |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
- | // (void *cframe, int errcode)
- | mr sp, CARG1
- | mr CRET1, CARG2
- |->vm_unwind_c_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | li TMP0, ~LJ_VMST_C
- | lwz GL:TMP1, L->glref
- | stw TMP0, GL:TMP1->vmstate
- | b ->vm_leave_unw
- |
- |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
- | // (void *cframe)
- | rlwinm sp, CARG1, 0, 0, 29
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | evsplati TISFUNC, LJ_TFUNC
- | lus TOBIT, 0x4338
- | evsplati TISTAB, LJ_TTAB
- | li TMP0, 0
- | lwz BASE, L->base
- | evmergelo TOBIT, TOBIT, TMP0
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | evsplati TISSTR, LJ_TSTR
- | li TMP1, LJ_TFALSE
- | evsplati TISNIL, LJ_TNIL
- | li_vmstate INTERP
- | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | la RA, -8(BASE) // Results start at BASE-8.
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
- | b ->vm_returnc
- |
- |//-----------------------------------------------------------------------
- |//-- Grow stack for calls -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_growstack_c: // Grow stack for C function.
- | li CARG2, LUA_MINSTACK
- | b >2
- |
- |->vm_growstack_l: // Grow stack for Lua function.
- | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
- | add RC, BASE, RC
- | sub RA, RA, BASE
- | stw BASE, L->base
- | addi PC, PC, 4 // Must point after first instruction.
- | stw RC, L->top
- | srwi CARG2, RA, 3
- |2:
- | // L->base = new base, L->top = top
- | stw PC, SAVE_PC
- | mr CARG1, L
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | lwz RC, L->top
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | sub RC, RC, BASE
- | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
- | ins_callt // Just retry the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Entry points into the assembler VM ---------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_resume: // Setup C frame and resume thread.
- | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
- | saveregs
- | mr L, CARG1
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | mr BASE, CARG2
- | lbz TMP1, L->status
- | stw L, SAVE_L
- | li PC, FRAME_CP
- | addi TMP0, sp, CFRAME_RESUME
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw CARG3, SAVE_NRES
- | cmplwi TMP1, 0
- | stw CARG3, SAVE_ERRF
- | stw TMP0, L->cframe
- | stw CARG3, SAVE_CFRAME
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | beq >3
- |
- | // Resume after yield (like a return).
- | mr RA, BASE
- | lwz BASE, L->base
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | lwz TMP1, L->top
- | evsplati TISFUNC, LJ_TFUNC
- | lus TOBIT, 0x4338
- | evsplati TISTAB, LJ_TTAB
- | lwz PC, FRAME_PC(BASE)
- | li TMP2, 0
- | evsplati TISSTR, LJ_TSTR
- | sub RD, TMP1, BASE
- | evmergelo TOBIT, TOBIT, TMP2
- | stb CARG3, L->status
- | andi. TMP0, PC, FRAME_TYPE
- | li_vmstate INTERP
- | addi RD, RD, 8
- | evsplati TISNIL, LJ_TNIL
- | mr MULTRES, RD
- | st_vmstate
- | beq ->BC_RET_Z
- | b ->vm_return
- |
- |->vm_pcall: // Setup protected C frame and enter VM.
- | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
- | saveregs
- | li PC, FRAME_CP
- | stw CARG4, SAVE_ERRF
- | b >1
- |
- |->vm_call: // Setup C frame and enter VM.
- | // (lua_State *L, TValue *base, int nres1)
- | saveregs
- | li PC, FRAME_C
- |
- |1: // Entry point for vm_pcall above (PC = ftype).
- | lwz TMP1, L:CARG1->cframe
- | stw CARG3, SAVE_NRES
- | mr L, CARG1
- | stw CARG1, SAVE_L
- | mr BASE, CARG2
- | stw sp, L->cframe // Add our C frame to cframe chain.
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | stw TMP1, SAVE_CFRAME
- | addi DISPATCH, DISPATCH, GG_G2DISP
- |
- |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
- | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | lwz TMP1, L->top
- | evsplati TISFUNC, LJ_TFUNC
- | add PC, PC, BASE
- | evsplati TISTAB, LJ_TTAB
- | lus TOBIT, 0x4338
- | li TMP0, 0
- | sub PC, PC, TMP2 // PC = frame delta + frame type
- | evsplati TISSTR, LJ_TSTR
- | sub NARGS8:RC, TMP1, BASE
- | evmergelo TOBIT, TOBIT, TMP0
- | li_vmstate INTERP
- | evsplati TISNIL, LJ_TNIL
- | st_vmstate
- |
- |->vm_call_dispatch:
- | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
- | li TMP0, -8
- | evlddx LFUNC:RB, BASE, TMP0
- | checkfunc LFUNC:RB
- | checkfail ->vmeta_call
- |
- |->vm_call_dispatch_f:
- | ins_call
- | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
- |
- |->vm_cpcall: // Setup protected C frame, call C.
- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
- | saveregs
- | mr L, CARG1
- | lwz TMP0, L:CARG1->stack
- | stw CARG1, SAVE_L
- | lwz TMP1, L->top
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
- | lwz TMP1, L->cframe
- | stw sp, L->cframe // Add our C frame to cframe chain.
- | li TMP2, 0
- | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
- | stw TMP2, SAVE_ERRF // No error function.
- | stw TMP1, SAVE_CFRAME
- | mtctr CARG4
- | bctrl // (lua_State *L, lua_CFunction func, void *ud)
- | mr. BASE, CRET1
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li PC, FRAME_CP
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | bne <3 // Else continue with the call.
- | b ->vm_leave_cp // No base? Just remove C frame.
- |
- |//-----------------------------------------------------------------------
- |//-- Metamethod handling ------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
- |// stack, so BASE doesn't need to be reloaded across these calls.
- |
- |//-- Continuation dispatch ----------------------------------------------
- |
- |->cont_dispatch:
- | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
- | lwz TMP0, -12(BASE) // Continuation.
- | mr RB, BASE
- | mr BASE, TMP2 // Restore caller BASE.
- | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
- | cmplwi TMP0, 0
- | lwz PC, -16(RB) // Restore PC from [cont|PC].
- | beq >1
- | subi TMP2, RD, 8
- | lwz TMP1, LFUNC:TMP1->pc
- | evstddx TISNIL, RA, TMP2 // Ensure one valid arg.
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // BASE = base, RA = resultptr, RB = meta base
- | mtctr TMP0
- | bctr // Jump to continuation.
- |
- |1: // Tail call from C function.
- | subi TMP1, RB, 16
- | sub RC, TMP1, BASE
- | b ->vm_call_tail
- |
- |->cont_cat: // RA = resultptr, RB = meta base
- | lwz INS, -4(PC)
- | subi CARG2, RB, 16
- | decode_RB8 SAVE0, INS
- | evldd TMP0, 0(RA)
- | add TMP1, BASE, SAVE0
- | stw BASE, L->base
- | cmplw TMP1, CARG2
- | sub CARG3, CARG2, TMP1
- | decode_RA8 RA, INS
- | evstdd TMP0, 0(CARG2)
- | bne ->BC_CAT_Z
- | evstddx TMP0, BASE, RA
- | b ->cont_nop
- |
- |//-- Table indexing metamethods -----------------------------------------
- |
- |->vmeta_tgets1:
- | evmergelo STR:RC, TISSTR, STR:RC
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | decode_RB8 RB, INS
- | evstdd STR:RC, 0(CARG3)
- | add CARG2, BASE, RB
- | b >1
- |
- |->vmeta_tgets:
- | evmergelo TAB:RB, TISTAB, TAB:RB
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo STR:RC, TISSTR, STR:RC
- | evstdd TAB:RB, 0(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | evstdd STR:RC, 0(CARG3)
- | b >1
- |
- |->vmeta_tgetb: // TMP0 = index
- | efdcfsi TMP0, TMP0
- | decode_RB8 RB, INS
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- | evstdd TMP0, 0(CARG3)
- | b >1
- |
- |->vmeta_tgetv:
- | decode_RB8 RB, INS
- | decode_RC8 RC, INS
- | add CARG2, BASE, RB
- | add CARG3, BASE, RC
- |1:
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | beq >3
- | evldd TMP0, 0(CRET1)
- | evstddx TMP0, BASE, RA
- | ins_next
- |
- |3: // Call __index metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | subfic TMP1, BASE, FRAME_CONT
- | lwz BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 16 // 2 args for func(t, k).
- | b ->vm_call_dispatch_f
- |
- |//-----------------------------------------------------------------------
- |
- |->vmeta_tsets1:
- | evmergelo STR:RC, TISSTR, STR:RC
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | decode_RB8 RB, INS
- | evstdd STR:RC, 0(CARG3)
- | add CARG2, BASE, RB
- | b >1
- |
- |->vmeta_tsets:
- | evmergelo TAB:RB, TISTAB, TAB:RB
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo STR:RC, TISSTR, STR:RC
- | evstdd TAB:RB, 0(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | evstdd STR:RC, 0(CARG3)
- | b >1
- |
- |->vmeta_tsetb: // TMP0 = index
- | efdcfsi TMP0, TMP0
- | decode_RB8 RB, INS
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- | evstdd TMP0, 0(CARG3)
- | b >1
- |
- |->vmeta_tsetv:
- | decode_RB8 RB, INS
- | decode_RC8 RC, INS
- | add CARG2, BASE, RB
- | add CARG3, BASE, RC
- |1:
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | evlddx TMP0, BASE, RA
- | beq >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | evstdd TMP0, 0(CRET1)
- | ins_next
- |
- |3: // Call __newindex metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
- | subfic TMP1, BASE, FRAME_CONT
- | lwz BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
- | evstdd TMP0, 16(BASE) // Copy value to third argument.
- | b ->vm_call_dispatch_f
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
- |->vmeta_comp:
- | mr CARG1, L
- | subi PC, PC, 4
- | add CARG2, BASE, RA
- | stw PC, SAVE_PC
- | add CARG3, BASE, RD
- | stw BASE, L->base
- | decode_OP1 CARG4, INS
- | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
- | // Returns 0/1 or TValue * (metamethod).
- |3:
- | cmplwi CRET1, 1
- | bgt ->vmeta_binop
- |4:
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- | decode_RD4 TMP2, INS
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | add TMP2, TMP2, TMP3
- | isellt PC, PC, TMP2
- |->cont_nop:
- | ins_next
- |
- |->cont_ra: // RA = resultptr
- | lwz INS, -4(PC)
- | evldd TMP0, 0(RA)
- | decode_RA8 TMP1, INS
- | evstddx TMP0, BASE, TMP1
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
- | lwz TMP0, 0(RA)
- | li TMP1, LJ_TTRUE
- | cmplw TMP1, TMP0 // Branch if result is true.
- | b <4
- |
- |->cont_condf: // RA = resultptr
- | lwz TMP0, 0(RA)
- | li TMP1, LJ_TFALSE
- | cmplw TMP0, TMP1 // Branch if result is false.
- | b <4
- |
- |->vmeta_equal:
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
- | subi PC, PC, 4
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
- | // Returns 0/1 or TValue * (metamethod).
- | b <3
- |
- |//-- Arithmetic metamethods ---------------------------------------------
- |
- |->vmeta_arith_vn:
- | add CARG3, BASE, RB
- | add CARG4, KBASE, RC
- | b >1
- |
- |->vmeta_arith_nv:
- | add CARG3, KBASE, RC
- | add CARG4, BASE, RB
- | b >1
- |
- |->vmeta_unm:
- | add CARG3, BASE, RD
- | mr CARG4, CARG3
- | b >1
- |
- |->vmeta_arith_vv:
- | add CARG3, BASE, RB
- | add CARG4, BASE, RC
- |1:
- | add CARG2, BASE, RA
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS.
- | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | beq ->cont_nop
- |
- | // Call metamethod for binary op.
- |->vmeta_binop:
- | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
- | sub TMP1, CRET1, BASE
- | stw PC, -16(CRET1) // [cont|PC]
- | mr TMP2, BASE
- | addi PC, TMP1, FRAME_CONT
- | mr BASE, CRET1
- | li NARGS8:RC, 16 // 2 args for func(o1, o2).
- | b ->vm_call_dispatch
- |
- |->vmeta_len:
-#if LJ_52
- | mr SAVE0, CARG1
-#endif
- | add CARG2, BASE, RD
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_len // (lua_State *L, TValue *o)
- | // Returns NULL (retry) or TValue * (metamethod base).
-#if LJ_52
- | cmplwi CRET1, 0
- | bne ->vmeta_binop // Binop call for compatibility.
- | mr CARG1, SAVE0
- | b ->BC_LEN_Z
-#else
- | b ->vmeta_binop // Binop call for compatibility.
-#endif
- |
- |//-- Call metamethod ----------------------------------------------------
- |
- |->vmeta_call: // Resolve and call __call metamethod.
- | // TMP2 = old base, BASE = new base, RC = nargs*8
- | mr CARG1, L
- | stw TMP2, L->base // This is the callers base!
- | subi CARG2, BASE, 8
- | stw PC, SAVE_PC
- | add CARG3, BASE, RC
- | mr SAVE0, NARGS8:RC
- | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
- | ins_call
- |
- |->vmeta_callt: // Resolve __call for BC_CALLT.
- | // BASE = old base, RA = new base, RC = nargs*8
- | mr CARG1, L
- | stw BASE, L->base
- | subi CARG2, RA, 8
- | stw PC, SAVE_PC
- | add CARG3, RA, RC
- | mr SAVE0, NARGS8:RC
- | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
- | lwz TMP1, FRAME_PC(BASE)
- | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
- | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
- | b ->BC_CALLT_Z
- |
- |//-- Argument coercion for 'for' statement ------------------------------
- |
- |->vmeta_for:
- | mr CARG1, L
- | stw BASE, L->base
- | mr CARG2, RA
- | stw PC, SAVE_PC
- | mr SAVE0, INS
- | bl extern lj_meta_for // (lua_State *L, TValue *base)
- |.if JIT
- | decode_OP1 TMP0, SAVE0
- |.endif
- | decode_RA8 RA, SAVE0
- |.if JIT
- | cmpwi TMP0, BC_JFORI
- |.endif
- | decode_RD8 RD, SAVE0
- |.if JIT
- | beq =>BC_JFORI
- |.endif
- | b =>BC_FORI
- |
- |//-----------------------------------------------------------------------
- |//-- Fast functions -----------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |.macro .ffunc, name
- |->ff_ .. name:
- |.endmacro
- |
- |.macro .ffunc_1, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
- | evldd CARG1, 0(BASE)
- | evldd CARG2, 8(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_n, name
- | .ffunc_1 name
- | checknum CARG1
- | checkfail ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_nn, name
- | .ffunc_2 name
- | evmergehi TMP0, CARG1, CARG2
- | checknum TMP0
- | checkanyfail ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
- |.macro ffgccheck
- | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
- | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
- | cmplw TMP0, TMP1
- | bgel ->fff_gcstep
- |.endmacro
- |
- |//-- Base library: checks -----------------------------------------------
- |
- |.ffunc assert
- | cmplwi NARGS8:RC, 8
- | evldd TMP0, 0(BASE)
- | blt ->fff_fallback
- | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
- | la RA, -8(BASE)
- | evcmpltu cr1, TMP0, TMP1
- | lwz PC, FRAME_PC(BASE)
- | bge cr1, ->fff_fallback
- | evstdd TMP0, 0(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
- | beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
- |1:
- | cmplw TMP1, RC
- | evlddx TMP0, BASE, TMP1
- | evstddx TMP0, RA, TMP1
- | addi TMP1, TMP1, 8
- | bne <1
- | b ->fff_res
- |
- |.ffunc type
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
- | blt ->fff_fallback
- | li TMP2, ~LJ_TNUMX
- | cmplw CARG1, TISNUM
- | not TMP1, CARG1
- | isellt TMP1, TMP2, TMP1
- | slwi TMP1, TMP1, 3
- | la TMP2, CFUNC:RB->upvalue
- | evlddx STR:CRET1, TMP2, TMP1
- | b ->fff_restv
- |
- |//-- Base library: getters and setters ---------------------------------
- |
- |.ffunc_1 getmetatable
- | checktab CARG1
- | evmergehi TMP1, CARG1, CARG1
- | checkfail >6
- |1: // Field metatable must be at same offset for GCtab and GCudata!
- | lwz TAB:RB, TAB:CARG1->metatable
- |2:
- | evmr CRET1, TISNIL
- | cmplwi TAB:RB, 0
- | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
- | beq ->fff_restv
- | lwz TMP0, TAB:RB->hmask
- | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result.
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |3: // Rearranged logic, because we expect _not_ to find the key.
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | lwz NODE:TMP2, NODE:TMP2->next
- | checkallok >5
- | cmplwi NODE:TMP2, 0
- | beq ->fff_restv // Not found, keep default result.
- | b <3
- |5:
- | checknil TMP1
- | checkok ->fff_restv // Ditto for nil value.
- | evmr CRET1, TMP1 // Return value of mt.__metatable.
- | b ->fff_restv
- |
- |6:
- | cmpwi TMP1, LJ_TUDATA
- | not TMP1, TMP1
- | beq <1
- | checknum CARG1
- | slwi TMP1, TMP1, 2
- | li TMP2, 4*~LJ_TNUMX
- | isellt TMP1, TMP2, TMP1
- | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
- | lwzx TAB:RB, TMP2, TMP1
- | b <2
- |
- |.ffunc_2 setmetatable
- | // Fast path: no mt for table yet and not clearing the mt.
- | evmergehi TMP0, TAB:CARG1, TAB:CARG2
- | checktab TMP0
- | checkanyfail ->fff_fallback
- | lwz TAB:TMP1, TAB:CARG1->metatable
- | cmplwi TAB:TMP1, 0
- | lbz TMP3, TAB:CARG1->marked
- | bne ->fff_fallback
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | stw TAB:CARG2, TAB:CARG1->metatable
- | beq ->fff_restv
- | barrierback TAB:CARG1, TMP3, TMP0
- | b ->fff_restv
- |
- |.ffunc rawget
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checktab CARG2
- | la CARG3, 8(BASE)
- | checkfail ->fff_fallback
- | mr CARG1, L
- | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // Returns cTValue *.
- | evldd CRET1, 0(CRET1)
- | b ->fff_restv
- |
- |//-- Base library: conversions ------------------------------------------
- |
- |.ffunc tonumber
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1
- | checkok ->fff_restv
- | b ->fff_fallback
- |
- |.ffunc_1 tostring
- | // Only handles the string or number case inline.
- | checkstr CARG1
- | // A __tostring method in the string base metatable is ignored.
- | checkok ->fff_restv // String key?
- | // Handle numbers inline, unless a number base metatable is present.
- | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
- | checknum CARG1
- | cmplwi cr1, TMP0, 0
- | stw BASE, L->base // Add frame since C call can throw.
- | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | bne ->fff_fallback
- | ffgccheck
- | mr CARG1, L
- | mr CARG2, BASE
- | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
- | // Returns GCstr *.
- | evmergelo STR:CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |//-- Base library: iterators -------------------------------------------
- |
- |.ffunc next
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
- | checktab TAB:CARG2
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
- | stw BASE, L->base // Add frame since C call can throw.
- | mr CARG1, L
- | stw BASE, L->top // Dummy frame length is ok.
- | la CARG3, 8(BASE)
- | stw PC, SAVE_PC
- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Returns 0 at end of traversal.
- | cmplwi CRET1, 0
- | evmr CRET1, TISNIL
- | beq ->fff_restv // End of traversal: return nil.
- | evldd TMP0, 8(BASE) // Copy key and value to results.
- | la RA, -8(BASE)
- | evldd TMP1, 16(BASE)
- | evstdd TMP0, 0(RA)
- | li RD, (2+1)*8
- | evstdd TMP1, 8(RA)
- | b ->fff_res
- |
- |.ffunc_1 pairs
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
-#else
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | la RA, -8(BASE)
-#endif
- | evstdd TISNIL, 8(BASE)
- | li RD, (3+1)*8
- | evstdd CFUNC:TMP0, 0(RA)
- | b ->fff_res
- |
- |.ffunc_2 ipairs_aux
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
- | checknum CARG2
- | lus TMP3, 0x3ff0
- | checkfail ->fff_fallback
- | efdctsi TMP2, CARG2
- | lwz TMP0, TAB:CARG1->asize
- | evmergelo TMP3, TMP3, ZERO
- | lwz TMP1, TAB:CARG1->array
- | efdadd CARG2, CARG2, TMP3
- | addi TMP2, TMP2, 1
- | la RA, -8(BASE)
- | cmplw TMP0, TMP2
- | slwi TMP3, TMP2, 3
- | evstdd CARG2, 0(RA)
- | ble >2 // Not in array part?
- | evlddx TMP1, TMP1, TMP3
- |1:
- | checknil TMP1
- | li RD, (0+1)*8
- | checkok ->fff_res // End of iteration, return 0 results.
- | li RD, (2+1)*8
- | evstdd TMP1, 8(RA)
- | b ->fff_res
- |2: // Check for empty hash part first. Otherwise call C function.
- | lwz TMP0, TAB:CARG1->hmask
- | cmplwi TMP0, 0
- | li RD, (0+1)*8
- | beq ->fff_res
- | mr CARG2, TMP2
- | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
- | // Returns cTValue * or NULL.
- | cmplwi CRET1, 0
- | li RD, (0+1)*8
- | beq ->fff_res
- | evldd TMP1, 0(CRET1)
- | b <1
- |
- |.ffunc_1 ipairs
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
-#else
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | la RA, -8(BASE)
-#endif
- | evsplati TMP1, 0
- | li RD, (3+1)*8
- | evstdd TMP1, 8(BASE)
- | evstdd CFUNC:TMP0, 0(RA)
- | b ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
- |
- |.ffunc pcall
- | cmplwi NARGS8:RC, 8
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | blt ->fff_fallback
- | mr TMP2, BASE
- | la BASE, 8(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
- | subi NARGS8:RC, NARGS8:RC, 8
- | addi PC, TMP3, 8+FRAME_PCALL
- | b ->vm_call_dispatch
- |
- |.ffunc_2 xpcall
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | mr TMP2, BASE
- | checkfunc CARG2 // Traceback must be a function.
- | checkfail ->fff_fallback
- | la BASE, 16(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
- | evstdd CARG2, 0(TMP2) // Swap function and traceback.
- | subi NARGS8:RC, NARGS8:RC, 16
- | evstdd CARG1, 8(TMP2)
- | addi PC, TMP3, 16+FRAME_PCALL
- | b ->vm_call_dispatch
- |
- |//-- Coroutine library --------------------------------------------------
- |
- |.macro coroutine_resume_wrap, resume
- |.if resume
- |.ffunc_1 coroutine_resume
- | evmergehi TMP0, L:CARG1, L:CARG1
- |.else
- |.ffunc coroutine_wrap_aux
- | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
- |.endif
- |.if resume
- | cmpwi TMP0, LJ_TTHREAD
- | bne ->fff_fallback
- |.endif
- | lbz TMP0, L:CARG1->status
- | lwz TMP1, L:CARG1->cframe
- | lwz CARG2, L:CARG1->top
- | cmplwi cr0, TMP0, LUA_YIELD
- | lwz TMP2, L:CARG1->base
- | cmplwi cr1, TMP1, 0
- | lwz TMP0, L:CARG1->maxstack
- | cmplw cr7, CARG2, TMP2
- | lwz PC, FRAME_PC(BASE)
- | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0
- | add TMP2, CARG2, NARGS8:RC
- | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD
- | cmplw cr1, TMP2, TMP0
- | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
- | stw PC, SAVE_PC
- | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov
- | stw BASE, L->base
- | blt cr6, ->fff_fallback
- |1:
- |.if resume
- | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
- | subi NARGS8:RC, NARGS8:RC, 8
- | subi TMP2, TMP2, 8
- |.endif
- | stw TMP2, L:CARG1->top
- | li TMP1, 0
- | stw BASE, L->top
- |2: // Move args to coroutine.
- | cmpw TMP1, NARGS8:RC
- | evlddx TMP0, BASE, TMP1
- | beq >3
- | evstddx TMP0, CARG2, TMP1
- | addi TMP1, TMP1, 8
- | b <2
- |3:
- | li CARG3, 0
- | mr L:SAVE0, L:CARG1
- | li CARG4, 0
- | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
- | // Returns thread status.
- |4:
- | lwz TMP2, L:SAVE0->base
- | cmplwi CRET1, LUA_YIELD
- | lwz TMP3, L:SAVE0->top
- | li_vmstate INTERP
- | lwz BASE, L->base
- | st_vmstate
- | bgt >8
- | sub RD, TMP3, TMP2
- | lwz TMP0, L->maxstack
- | cmplwi RD, 0
- | add TMP1, BASE, RD
- | beq >6 // No results?
- | cmplw TMP1, TMP0
- | li TMP1, 0
- | bgt >9 // Need to grow stack?
- |
- | subi TMP3, RD, 8
- | stw TMP2, L:SAVE0->top // Clear coroutine stack.
- |5: // Move results from coroutine.
- | cmplw TMP1, TMP3
- | evlddx TMP0, TMP2, TMP1
- | evstddx TMP0, BASE, TMP1
- | addi TMP1, TMP1, 8
- | bne <5
- |6:
- | andi. TMP0, PC, FRAME_TYPE
- |.if resume
- | li TMP1, LJ_TTRUE
- | la RA, -8(BASE)
- | stw TMP1, -8(BASE) // Prepend true to results.
- | addi RD, RD, 16
- |.else
- | mr RA, BASE
- | addi RD, RD, 8
- |.endif
- |7:
- | stw PC, SAVE_PC
- | mr MULTRES, RD
- | beq ->BC_RET_Z
- | b ->vm_return
- |
- |8: // Coroutine returned with error (at co->top-1).
- |.if resume
- | andi. TMP0, PC, FRAME_TYPE
- | la TMP3, -8(TMP3)
- | li TMP1, LJ_TFALSE
- | evldd TMP0, 0(TMP3)
- | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
- | evstdd TMP0, 0(BASE) // Copy error message.
- | b <7
- |.else
- | mr CARG1, L
- | mr CARG2, L:SAVE0
- | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
- |.endif
- |
- |9: // Handle stack expansion on return from yield.
- | mr CARG1, L
- | srwi CARG2, RD, 3
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | li CRET1, 0
- | b <4
- |.endmacro
- |
- | coroutine_resume_wrap 1 // coroutine.resume
- | coroutine_resume_wrap 0 // coroutine.wrap
- |
- |.ffunc coroutine_yield
- | lwz TMP0, L->cframe
- | add TMP1, BASE, NARGS8:RC
- | stw BASE, L->base
- | andi. TMP0, TMP0, CFRAME_RESUME
- | stw TMP1, L->top
- | li CRET1, LUA_YIELD
- | beq ->fff_fallback
- | stw ZERO, L->cframe
- | stb CRET1, L->status
- | b ->vm_leave_unw
- |
- |//-- Math library -------------------------------------------------------
- |
- |.ffunc_n math_abs
- | efdabs CRET1, CARG1
- | // Fallthrough.
- |
- |->fff_restv:
- | // CRET1 = TValue result.
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
- | evstdd CRET1, 0(RA)
- |->fff_res1:
- | // RA = results, PC = return.
- | li RD, (1+1)*8
- |->fff_res:
- | // RA = results, RD = (nresults+1)*8, PC = return.
- | andi. TMP0, PC, FRAME_TYPE
- | mr MULTRES, RD
- | bne ->vm_return
- | lwz INS, -4(PC)
- | decode_RB8 RB, INS
- |5:
- | cmplw RB, RD // More results expected?
- | decode_RA8 TMP0, INS
- | bgt >6
- | ins_next1
- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
- | sub BASE, RA, TMP0
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, RA, TMP1
- | b <5
- |
- |.macro math_extern, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | bl extern func@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |.endmacro
- |
- |.macro math_extern2, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | evldd CARG4, 8(BASE)
- | blt ->fff_fallback
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | evmergehi CARG3, CARG4, CARG4
- | checkanyfail ->fff_fallback
- | bl extern func@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |.endmacro
- |
- |.macro math_round, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | lwz PC, FRAME_PC(BASE)
- | bl ->vm_..func.._hilo;
- | la RA, -8(BASE)
- | evstdd CRET2, 0(RA)
- | b ->fff_res1
- |.endmacro
- |
- | math_round floor
- | math_round ceil
- |
- | math_extern sqrt
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | bl extern log@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |
- | math_extern log10
- | math_extern exp
- | math_extern sin
- | math_extern cos
- | math_extern tan
- | math_extern asin
- | math_extern acos
- | math_extern atan
- | math_extern sinh
- | math_extern cosh
- | math_extern tanh
- | math_extern2 pow
- | math_extern2 atan2
- | math_extern2 fmod
- |
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | evldd CARG4, 8(BASE)
- | blt ->fff_fallback
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | checkanyfail ->fff_fallback
- | efdctsi CARG3, CARG4
- | bl extern ldexp@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |
- |.ffunc math_frexp
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | lwz PC, FRAME_PC(BASE)
- | bl extern frexp@plt
- | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo CRET1, CRET1, CRET2
- | efdcfsi CRET2, TMP1
- | la RA, -8(BASE)
- | evstdd CRET1, 0(RA)
- | li RD, (2+1)*8
- | evstdd CRET2, 8(RA)
- | b ->fff_res
- |
- |.ffunc math_modf
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | la CARG3, -8(BASE)
- | lwz PC, FRAME_PC(BASE)
- | bl extern modf@plt
- | evmergelo CRET1, CRET1, CRET2
- | la RA, -8(BASE)
- | evstdd CRET1, 0(BASE)
- | li RD, (2+1)*8
- | b ->fff_res
- |
- |.macro math_minmax, name, cmpop
- | .ffunc_1 name
- | checknum CARG1
- | li TMP1, 8
- | checkfail ->fff_fallback
- |1:
- | evlddx CARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
- | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1.
- | checkfail ->fff_fallback
- | cmpop CARG2, CARG1
- | addi TMP1, TMP1, 8
- | crmove 4*cr0+lt, 4*cr0+gt
- | evsel CARG1, CARG2, CARG1
- | b <1
- |.endmacro
- |
- | math_minmax math_min, efdtstlt
- | math_minmax math_max, efdtstgt
- |
- |//-- String library -----------------------------------------------------
- |
- |.ffunc_1 string_len
- | checkstr STR:CARG1
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
- | cmplwi NARGS8:RC, 8
- | evldd STR:CARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checkstr STR:CARG1
- | la RA, -8(BASE)
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | li RD, (0+1)*8
- | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
- | li TMP2, (1+1)*8
- | cmplwi TMP0, 0
- | lwz PC, FRAME_PC(BASE)
- | efdcfsi CRET1, TMP1
- | iseleq RD, RD, TMP2
- | evstdd CRET1, 0(RA)
- | b ->fff_res
- |
- |.ffunc string_char // Only handle the 1-arg case here.
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
- | checknum CARG1
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | checkfail ->fff_fallback
- | efdctsiz TMP0, CARG1
- | li CARG3, 1
- | cmplwi TMP0, 255
- | stb TMP0, 0(CARG2)
- | bgt ->fff_fallback
- |->fff_newstr:
- | mr CARG1, L
- | stw BASE, L->base
- | stw PC, SAVE_PC
- | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
- | // Returns GCstr *.
- | lwz BASE, L->base
- | evmergelo STR:CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |.ffunc string_sub
- | ffgccheck
- | cmplwi NARGS8:RC, 16
- | evldd CARG3, 16(BASE)
- | evldd STR:CARG1, 0(BASE)
- | blt ->fff_fallback
- | evldd CARG2, 8(BASE)
- | li TMP2, -1
- | beq >1
- | checknum CARG3
- | checkfail ->fff_fallback
- | efdctsiz TMP2, CARG3
- |1:
- | checknum CARG2
- | checkfail ->fff_fallback
- | checkstr STR:CARG1
- | efdctsiz TMP1, CARG2
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | cmplw TMP0, TMP2 // len < end? (unsigned compare)
- | add TMP3, TMP2, TMP0
- | blt >5
- |2:
- | cmpwi TMP1, 0 // start <= 0?
- | add TMP3, TMP1, TMP0
- | ble >7
- |3:
- | sub. CARG3, TMP2, TMP1
- | addi CARG2, STR:CARG1, #STR-1
- | addi CARG3, CARG3, 1
- | add CARG2, CARG2, TMP1
- | isellt CARG3, r0, CARG3
- | b ->fff_newstr
- |
- |5: // Negative end or overflow.
- | cmpw TMP0, TMP2
- | addi TMP3, TMP3, 1
- | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1
- | b <2
- |
- |7: // Negative start or underflow.
- | cmpwi cr1, TMP3, 0
- | iseleq TMP1, r0, TMP3
- | isel TMP1, r0, TMP1, 4*cr1+lt
- | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
- | b <3
- |
- |.ffunc string_rep // Only handle the 1-char case inline.
- | ffgccheck
- | cmplwi NARGS8:RC, 16
- | evldd CARG1, 0(BASE)
- | evldd CARG2, 8(BASE)
- | bne ->fff_fallback // Exactly 2 arguments.
- | checknum CARG2
- | checkfail ->fff_fallback
- | checkstr STR:CARG1
- | efdctsiz CARG3, CARG2
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | cmpwi CARG3, 0
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | ble >2 // Count <= 0? (or non-int)
- | cmplwi TMP0, 1
- | subi TMP2, CARG3, 1
- | blt >2 // Zero length string?
- | cmplw cr1, TMP1, CARG3
- | bne ->fff_fallback // Fallback for > 1-char strings.
- | lbz TMP0, STR:CARG1[1]
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | blt cr1, ->fff_fallback
- |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
- | cmplwi TMP2, 0
- | stbx TMP0, CARG2, TMP2
- | subi TMP2, TMP2, 1
- | bne <1
- | b ->fff_newstr
- |2: // Return empty string.
- | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
- | evmergelo CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |.ffunc string_reverse
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- | checkstr STR:CARG1
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | checkfail ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | li TMP2, 0
- | cmplw TMP1, CARG3
- | subi TMP3, CARG3, 1
- | blt ->fff_fallback
- |1: // Reverse string copy.
- | cmpwi TMP3, 0
- | lbzx TMP1, CARG1, TMP2
- | blt ->fff_newstr
- | stbx TMP1, CARG2, TMP3
- | subi TMP3, TMP3, 1
- | addi TMP2, TMP2, 1
- | b <1
- |
- |.macro ffstring_case, name, lo
- | .ffunc name
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- | checkstr STR:CARG1
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | checkfail ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | cmplw TMP1, CARG3
- | li TMP2, 0
- | blt ->fff_fallback
- |1: // ASCII case conversion.
- | cmplw TMP2, CARG3
- | lbzx TMP1, CARG1, TMP2
- | bge ->fff_newstr
- | subi TMP0, TMP1, lo
- | xori TMP3, TMP1, 0x20
- | cmplwi TMP0, 26
- | isellt TMP1, TMP3, TMP1
- | stbx TMP1, CARG2, TMP2
- | addi TMP2, TMP2, 1
- | b <1
- |.endmacro
- |
- |ffstring_case string_lower, 65
- |ffstring_case string_upper, 97
- |
- |//-- Table library ------------------------------------------------------
- |
- |.ffunc_1 table_getn
- | checktab CARG1
- | checkfail ->fff_fallback
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | efdcfsi CRET1, CRET1
- | b ->fff_restv
- |
- |//-- Bit library --------------------------------------------------------
- |
- |.macro .ffunc_bit, name
- | .ffunc_n bit_..name
- | efdadd CARG1, CARG1, TOBIT
- |.endmacro
- |
- |.ffunc_bit tobit
- |->fff_resbit:
- | efdcfsi CRET1, CARG1
- | b ->fff_restv
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name
- | li TMP1, 8
- |1:
- | evlddx CARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
- | bge cr1, ->fff_resbit
- | checkfail ->fff_fallback
- | efdadd CARG2, CARG2, TOBIT
- | ins CARG1, CARG1, CARG2
- | addi TMP1, TMP1, 8
- | b <1
- |.endmacro
- |
- |.ffunc_bit_op band, and
- |.ffunc_bit_op bor, or
- |.ffunc_bit_op bxor, xor
- |
- |.ffunc_bit bswap
- | rotlwi TMP0, CARG1, 8
- | rlwimi TMP0, CARG1, 24, 0, 7
- | rlwimi TMP0, CARG1, 24, 16, 23
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.ffunc_bit bnot
- | not TMP0, CARG1
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.macro .ffunc_bit_sh, name, ins, shmod
- | .ffunc_nn bit_..name
- | efdadd CARG2, CARG2, TOBIT
- | efdadd CARG1, CARG1, TOBIT
- |.if shmod == 1
- | rlwinm CARG2, CARG2, 0, 27, 31
- |.elif shmod == 2
- | neg CARG2, CARG2
- |.endif
- | ins TMP0, CARG1, CARG2
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |.endmacro
- |
- |.ffunc_bit_sh lshift, slw, 1
- |.ffunc_bit_sh rshift, srw, 1
- |.ffunc_bit_sh arshift, sraw, 1
- |.ffunc_bit_sh rol, rotlw, 0
- |.ffunc_bit_sh ror, rotlw, 2
- |
- |//-----------------------------------------------------------------------
- |
- |->fff_fallback: // Call fast function fallback handler.
- | // BASE = new base, RB = CFUNC, RC = nargs*8
- | lwz TMP3, CFUNC:RB->f
- | add TMP1, BASE, NARGS8:RC
- | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
- | addi TMP0, TMP1, 8*LUA_MINSTACK
- | lwz TMP2, L->maxstack
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | cmplw TMP0, TMP2
- | stw BASE, L->base
- | stw TMP1, L->top
- | mr CARG1, L
- | bgt >5 // Need to grow stack.
- | mtctr TMP3
- | bctrl // (lua_State *L)
- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
- | lwz BASE, L->base
- | cmpwi CRET1, 0
- | slwi RD, CRET1, 3
- | la RA, -8(BASE)
- | bgt ->fff_res // Returned nresults+1?
- |1: // Returned 0 or -1: retry fast path.
- | lwz TMP0, L->top
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | sub NARGS8:RC, TMP0, BASE
- | bne ->vm_call_tail // Returned -1?
- | ins_callt // Returned 0: retry fast path.
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
- | andi. TMP0, PC, FRAME_TYPE
- | rlwinm TMP1, PC, 0, 0, 28
- | bne >3
- | lwz INS, -4(PC)
- | decode_RA8 TMP1, INS
- | addi TMP1, TMP1, 8
- |3:
- | sub TMP2, BASE, TMP1
- | b ->vm_call_dispatch // Resolve again for tailcall.
- |
- |5: // Grow stack for fallback handler.
- | li CARG2, LUA_MINSTACK
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry.
- | b <1
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RC = nargs*8
- | mflr SAVE0
- | stw BASE, L->base
- | add TMP0, BASE, NARGS8:RC
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | stw TMP0, L->top
- | mr CARG1, L
- | bl extern lj_gc_step // (lua_State *L)
- | lwz BASE, L->base
- | mtlr SAVE0
- | lwz TMP0, L->top
- | sub NARGS8:RC, TMP0, BASE
- | lwz CFUNC:RB, FRAME_FUNC(BASE)
- | blr
- |
- |//-----------------------------------------------------------------------
- |//-- Special dispatch targets -------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_record: // Dispatch target for recording phase.
- |.if JIT
- | NYI
- |.endif
- |
- |->vm_rethook: // Dispatch target for return hooks.
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
- | beq >1
- |5: // Re-dispatch to static ins.
- | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS.
- | lwzx TMP0, DISPATCH, TMP1
- | mtctr TMP0
- | bctr
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
- | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
- | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
- | bne <5
- |
- | cmpwi cr1, TMP0, 0
- | addic. TMP2, TMP2, -1
- | beq cr1, <5
- | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
- | beq >1
- | bge cr1, <5
- |1:
- | mr CARG1, L
- | stw MULTRES, SAVE_MULTRES
- | mr CARG2, PC
- | stw BASE, L->base
- | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
- | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
- |3:
- | lwz BASE, L->base
- |4: // Re-dispatch to static ins.
- | lwz INS, -4(PC)
- | decode_OP4 TMP1, INS
- | decode_RB8 RB, INS
- | addi TMP1, TMP1, GG_DISP2STATIC
- | decode_RD8 RD, INS
- | lwzx TMP0, DISPATCH, TMP1
- | decode_RA8 RA, INS
- | decode_RC8 RC, INS
- | mtctr TMP0
- | bctr
- |
- |->cont_hook: // Continue from hook yield.
- | addi PC, PC, 4
- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
- | b <4
- |
- |->vm_hotloop: // Hot loop counter underflow.
- |.if JIT
- | NYI
- |.endif
- |
- |->vm_callhook: // Dispatch target for call hooks.
- | mr CARG2, PC
- |.if JIT
- | b >1
- |.endif
- |
- |->vm_hotcall: // Hot call counter underflow.
- |.if JIT
- | ori CARG2, PC, 1
- |1:
- |.endif
- | add TMP0, BASE, RC
- | stw PC, SAVE_PC
- | mr CARG1, L
- | stw BASE, L->base
- | sub RA, RA, BASE
- | stw TMP0, L->top
- | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
- | // Returns ASMFunction.
- | lwz BASE, L->base
- | lwz TMP0, L->top
- | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
- | sub NARGS8:RC, TMP0, BASE
- | add RA, BASE, RA
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | mtctr CRET1
- | bctr
- |
- |//-----------------------------------------------------------------------
- |//-- Trace exit handler -------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_exit_handler:
- |.if JIT
- | NYI
- |.endif
- |->vm_exit_interp:
- |.if JIT
- | NYI
- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |// FP value rounding. Called by math.floor/math.ceil fast functions
- |// and from JIT code.
- |//
- |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
- |// The alternative hard-float approaches have a deep dependency chain.
- |// The resulting latency is at least 3x-7x the double-precision FP latency
- |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
- |//
- |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
- |// However it relies on a fast way to transfer the FP value to GPRs
- |// (e500v2: 0cy for lo-word, 1cy for hi-word).
- |//
- |.macro vm_round, name, mode
- | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
- |->name.._efd: // Input: CARG2, output: CRET2
- | evmergehi CARG1, CARG2, CARG2
- |->name.._hilo:
- | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
- | rlwinm TMP2, CARG1, 12, 21, 31
- | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
- | li TMP1, -1
- | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
- | subfic TMP0, TMP2, 52
- | bgt cr1, >1
- | lus TMP3, 0xfff0
- | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
- | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
- |.if mode == 2 // trunc(x):
- | evmergelo TMP0, TMP1, TMP0
- | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
- |.else
- | andc TMP2, CARG2, TMP0
- | andc TMP3, CARG1, TMP1
- | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
- | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
- |.if mode == 0 // floor(x):
- | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
- |.else // ceil(x):
- | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
- |.endif
- | and CARG2, CARG2, TMP0 // lo &= lomask
- | and CARG1, CARG1, TMP1 // hi &= himask
- | subc TMP0, CARG2, TMP0
- | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
- | sube TMP1, CARG1, TMP1
- | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
- | evmergelo CRET2, TMP1, TMP0
- |.endif
- | blr
- |1:
- | bgtlr // Already done if >=2^52, +-inf or nan.
- |.if mode == 2 // trunc(x):
- | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
- | li TMP0, 0
- | evmergelo CRET2, TMP1, TMP0
- |.else
- | rlwinm TMP2, CARG1, 0, 1, 31
- | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
- | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
- | lus TMP1, 0x3ff0
- |.if mode == 0 // floor(x):
- | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
- |.else // ceil(x):
- | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
- |.endif
- | li TMP0, 0
- | iseleq TMP1, r0, TMP1
- | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
- | evmergelo CRET2, CARG1, TMP0
- |.endif
- | blr
- |.endmacro
- |
- |->vm_floor:
- | mflr CARG3
- | evmergelo CARG2, CARG1, CARG2
- | bl ->vm_floor_hilo
- | mtlr CARG3
- | evmergehi CRET1, CRET2, CRET2
- | blr
- |
- | vm_round vm_floor, 0
- | vm_round vm_ceil, 1
- |.if JIT
- | vm_round vm_trunc, 2
- |.else
- |->vm_trunc_efd:
- |->vm_trunc_hilo:
- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |//-----------------------------------------------------------------------
- |//-- FFI helper functions -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_ffi_call:
- |.if FFI
- | NYI
- |.endif
- |
- |//-----------------------------------------------------------------------
-}
-
-/* Generate the code for a single instruction. */
-static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-{
- int vk = 0;
- |=>defop:
-
- switch (op) {
-
- /* -- Comparison ops ---------------------------------------------------- */
-
- /* Remember: all ops branch for a true comparison, fall through otherwise. */
-
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- | evlddx TMP0, BASE, RA
- | addi PC, PC, 4
- | evlddx TMP1, BASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz TMP2, -4(PC)
- | evmergehi RB, TMP0, TMP1
- | decode_RD4 TMP2, TMP2
- | checknum RB
- | add TMP2, TMP2, TMP3
- | checkanyfail ->vmeta_comp
- | efdcmplt TMP0, TMP1
- if (op == BC_ISLE || op == BC_ISGT) {
- | efdcmpeq cr1, TMP0, TMP1
- | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- }
- if (op == BC_ISLT || op == BC_ISLE) {
- | iselgt PC, TMP2, PC
- } else {
- | iselgt PC, PC, TMP2
- }
- | ins_next
- break;
-
- case BC_ISEQV: case BC_ISNEV:
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- | evlddx CARG2, BASE, RA
- | addi PC, PC, 4
- | evlddx CARG3, BASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz TMP2, -4(PC)
- | evmergehi RB, CARG2, CARG3
- | decode_RD4 TMP2, TMP2
- | checknum RB
- | add TMP2, TMP2, TMP3
- | checkanyfail >5
- | efdcmpeq CARG2, CARG3
- if (vk) {
- | iselgt PC, TMP2, PC
- } else {
- | iselgt PC, PC, TMP2
- }
- |1:
- | ins_next
- |
- |5: // Either or both types are not numbers.
- | evcmpeq CARG2, CARG3
- | not TMP3, RB
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
- | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
- | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive.
- | mr SAVE0, PC
- if (vk) {
- | isel PC, TMP2, PC, 4*cr7+gt
- } else {
- | isel TMP2, PC, TMP2, 4*cr7+gt
- }
- | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
- if (vk) {
- | isel PC, TMP2, PC, 4*cr0+so
- } else {
- | isel PC, PC, TMP2, 4*cr0+so
- }
- | blt cr7, <1 // Done if 1 or 2.
- | blt cr6, <1 // Done if not tab/ud.
- |
- | // Different tables or userdatas. Need to check __eq metamethod.
- | // Field metatable must be at same offset for GCtab and GCudata!
- | lwz TAB:TMP2, TAB:CARG2->metatable
- | li CARG4, 1-vk // ne = 0 or 1.
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable?
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_eq
- | bne <1 // Or 'no __eq' flag set?
- | mr PC, SAVE0 // Restore old PC.
- | b ->vmeta_equal // Handle __eq metamethod.
- break;
-
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
- | evlddx TMP0, BASE, RA
- | srwi RD, RD, 1
- | lwz INS, 0(PC)
- | subfic RD, RD, -4
- | addi PC, PC, 4
- | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | evmergelo STR:TMP1, TISSTR, STR:TMP1
- | add TMP2, TMP2, TMP3
- | evcmpeq TMP0, STR:TMP1
- if (vk) {
- | isel PC, TMP2, PC, 4*cr0+so
- } else {
- | isel PC, PC, TMP2, 4*cr0+so
- }
- | ins_next
- break;
-
- case BC_ISEQN: case BC_ISNEN:
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- | evlddx TMP0, BASE, RA
- | addi PC, PC, 4
- | evlddx TMP1, KBASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz INS, -4(PC)
- | checknum TMP0
- | checkfail >5
- | efdcmpeq TMP0, TMP1
- |1:
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (vk) {
- | iselgt PC, TMP2, PC
- |5:
- } else {
- | iselgt PC, PC, TMP2
- }
- |3:
- | ins_next
- if (!vk) {
- |5:
- | decode_RD4 TMP2, INS
- | add PC, TMP2, TMP3
- | b <3
- }
- break;
-
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
- | lwzx TMP0, BASE, RA
- | srwi TMP1, RD, 3
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- | not TMP1, TMP1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | cmplw TMP0, TMP1
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (vk) {
- | iseleq PC, TMP2, PC
- } else {
- | iseleq PC, PC, TMP2
- }
- | ins_next
- break;
-
- /* -- Unary test and copy ops ------------------------------------------- */
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
- | evlddx TMP0, BASE, RD
- | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
- | lwz INS, 0(PC)
- | evcmpltu TMP0, TMP1
- | addi PC, PC, 4
- if (op == BC_IST || op == BC_ISF) {
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (op == BC_IST) {
- | isellt PC, TMP2, PC
- } else {
- | isellt PC, PC, TMP2
- }
- } else {
- if (op == BC_ISTC) {
- | checkfail >1
- } else {
- | checkok >1
- }
- | addis PC, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | evstddx TMP0, BASE, RA
- | add PC, PC, TMP2
- |1:
- }
- | ins_next
- break;
-
- /* -- Unary ops --------------------------------------------------------- */
-
- case BC_MOV:
- | // RA = dst*8, RD = src*8
- | ins_next1
- | evlddx TMP0, BASE, RD
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_NOT:
- | // RA = dst*8, RD = src*8
- | ins_next1
- | lwzx TMP0, BASE, RD
- | subfic TMP1, TMP0, LJ_TTRUE
- | adde TMP0, TMP0, TMP1
- | stwx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_UNM:
- | // RA = dst*8, RD = src*8
- | evlddx TMP0, BASE, RD
- | checknum TMP0
- | checkfail ->vmeta_unm
- | efdneg TMP0, TMP0
- | ins_next1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_LEN:
- | // RA = dst*8, RD = src*8
- | evlddx CARG1, BASE, RD
- | checkstr CARG1
- | checkfail >2
- | lwz CRET1, STR:CARG1->len
- |1:
- | ins_next1
- | efdcfsi TMP0, CRET1
- | evstddx TMP0, BASE, RA
- | ins_next2
- |2:
- | checktab CARG1
- | checkfail ->vmeta_len
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | cmplwi TAB:TMP2, 0
- | bne >9
- |3:
-#endif
- |->BC_LEN_Z:
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | b <1
-#if LJ_52
- |9:
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_len
- | bne <3 // 'no __len' flag set: done.
- | b ->vmeta_len
-#endif
- break;
-
- /* -- Binary ops -------------------------------------------------------- */
-
- |.macro ins_arithpre, t0, t1
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
- | evlddx t0, BASE, RB
- | checknum t0
- | evlddx t1, KBASE, RC
- | checkfail ->vmeta_arith_vn
- || break;
- ||case 1:
- | evlddx t1, BASE, RB
- | checknum t1
- | evlddx t0, KBASE, RC
- | checkfail ->vmeta_arith_nv
- || break;
- ||default:
- | evlddx t0, BASE, RB
- | evlddx t1, BASE, RC
- | evmergehi TMP2, t0, t1
- | checknum TMP2
- | checkanyfail ->vmeta_arith_vv
- || break;
- ||}
- |.endmacro
- |
- |.macro ins_arith, ins
- | ins_arithpre TMP0, TMP1
- | ins_next1
- | ins TMP0, TMP0, TMP1
- | evstddx TMP0, BASE, RA
- | ins_next2
- |.endmacro
-
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith efdadd
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith efdsub
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith efdmul
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith efddiv
- break;
- case BC_MODVN:
- | ins_arithpre RD, SAVE0
- |->BC_MODVN_Z:
- | efddiv CARG2, RD, SAVE0
- | bl ->vm_floor_efd // floor(b/c)
- | efdmul TMP0, CRET2, SAVE0
- | ins_next1
- | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_MODNV: case BC_MODVV:
- | ins_arithpre RD, SAVE0
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- break;
- case BC_POW:
- | evlddx CARG2, BASE, RB
- | evlddx CARG4, BASE, RC
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | evmergehi CARG3, CARG4, CARG4
- | checkanyfail ->vmeta_arith_vv
- | bl extern pow@plt
- | evmergelo CRET2, CRET1, CRET2
- | evstddx CRET2, BASE, RA
- | ins_next
- break;
-
- case BC_CAT:
- | // RA = dst*8, RB = src_start*8, RC = src_end*8
- | sub CARG3, RC, RB
- | stw BASE, L->base
- | add CARG2, BASE, RC
- | mr SAVE0, RB
- |->BC_CAT_Z:
- | stw PC, SAVE_PC
- | mr CARG1, L
- | srwi CARG3, CARG3, 3
- | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | lwz BASE, L->base
- | bne ->vmeta_binop
- | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA.
- | evstddx TMP0, BASE, RA
- | ins_next
- break;
-
- /* -- Constant ops ------------------------------------------------------ */
-
- case BC_KSTR:
- | // RA = dst*8, RD = str_const*8 (~)
- | ins_next1
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
- | evmergelo TMP0, TISSTR, TMP0
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KCDATA:
- |.if FFI
- | // RA = dst*8, RD = cdata_const*8 (~)
- | ins_next1
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
- | li TMP2, LJ_TCDATA
- | evmergelo TMP0, TMP2, TMP0
- | evstddx TMP0, BASE, RA
- | ins_next2
- |.endif
- break;
- case BC_KSHORT:
- | // RA = dst*8, RD = int16_literal*8
- | srwi TMP1, RD, 3
- | extsh TMP1, TMP1
- | ins_next1
- | efdcfsi TMP0, TMP1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KNUM:
- | // RA = dst*8, RD = num_const*8
- | evlddx TMP0, KBASE, RD
- | ins_next1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KPRI:
- | // RA = dst*8, RD = primitive_type*8 (~)
- | srwi TMP1, RD, 3
- | not TMP0, TMP1
- | ins_next1
- | stwx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KNIL:
- | // RA = base*8, RD = end*8
- | evstddx TISNIL, BASE, RA
- | addi RA, RA, 8
- |1:
- | evstddx TISNIL, BASE, RA
- | cmpw RA, RD
- | addi RA, RA, 8
- | blt <1
- | ins_next_
- break;
-
- /* -- Upvalue and function ops ------------------------------------------ */
-
- case BC_UGET:
- | // RA = dst*8, RD = uvnum*8
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RD, RD, 1
- | addi RD, RD, offsetof(GCfuncL, uvptr)
- | lwzx UPVAL:RB, LFUNC:RB, RD
- | lwz TMP1, UPVAL:RB->v
- | evldd TMP0, 0(TMP1)
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_USETV:
- | // RA = uvnum*8, RD = src*8
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | evlddx TMP1, BASE, RD
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
- | evmergehi TMP2, TMP1, TMP1
- | evstdd TMP1, 0(CARG2)
- | cmplwi cr1, TMP0, 0
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | subi TMP2, TMP2, (LJ_TISNUM+1)
- | bne >2 // Upvalue is closed and black?
- |1:
- | ins_next
- |
- |2: // Check if new value is collectable.
- | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
- | bge <1 // tvisgcv(v)
- | lbz TMP3, GCOBJ:TMP1->gch.marked
- | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
- | la CARG1, GG_DISP2G(DISPATCH)
- | // Crossed a write barrier. Move the barrier forward.
- | beq <1
- | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
- | b <1
- break;
- case BC_USETS:
- | // RA = uvnum*8, RD = str_const*8 (~)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi TMP1, RD, 1
- | srwi RA, RA, 1
- | subfic TMP1, TMP1, -4
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | evmergelo STR:TMP1, TISSTR, STR:TMP1
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP3, STR:TMP1->marked
- | lbz TMP2, UPVAL:RB->closed
- | evstdd STR:TMP1, 0(CARG2)
- | bne >2
- |1:
- | ins_next
- |
- |2: // Check if string is white and ensure upvalue is closed.
- | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
- | cmplwi cr1, TMP2, 0
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | la CARG1, GG_DISP2G(DISPATCH)
- | // Crossed a write barrier. Move the barrier forward.
- | beq <1
- | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
- | b <1
- break;
- case BC_USETN:
- | // RA = uvnum*8, RD = num_const*8
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | evlddx TMP0, KBASE, RD
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lwz TMP1, UPVAL:RB->v
- | evstdd TMP0, 0(TMP1)
- | ins_next2
- break;
- case BC_USETP:
- | // RA = uvnum*8, RD = primitive_type*8 (~)
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | srwi TMP0, RD, 3
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | not TMP0, TMP0
- | lwz TMP1, UPVAL:RB->v
- | stw TMP0, 0(TMP1)
- | ins_next2
- break;
-
- case BC_UCLO:
- | // RA = level*8, RD = target
- | lwz TMP1, L->openupval
- | branch_RD // Do this first since RD is not saved.
- | stw BASE, L->base
- | cmplwi TMP1, 0
- | mr CARG1, L
- | beq >1
- | add CARG2, BASE, RA
- | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
- | lwz BASE, L->base
- |1:
- | ins_next
- break;
-
- case BC_FNEW:
- | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
- | srwi TMP1, RD, 1
- | stw BASE, L->base
- | subfic TMP1, TMP1, -4
- | stw PC, SAVE_PC
- | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
- | mr CARG1, L
- | lwz CARG3, FRAME_FUNC(BASE)
- | // (lua_State *L, GCproto *pt, GCfuncL *parent)
- | bl extern lj_func_newL_gc
- | // Returns GCfuncL *.
- | lwz BASE, L->base
- | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
- | evstddx LFUNC:CRET1, BASE, RA
- | ins_next
- break;
-
- /* -- Table ops --------------------------------------------------------- */
-
- case BC_TNEW:
- case BC_TDUP:
- | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
- | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
- | mr CARG1, L
- | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
- | stw BASE, L->base
- | cmplw TMP0, TMP1
- | stw PC, SAVE_PC
- | bge >5
- |1:
- if (op == BC_TNEW) {
- | rlwinm CARG2, RD, 29, 21, 31
- | rlwinm CARG3, RD, 18, 27, 31
- | cmpwi CARG2, 0x7ff
- | li TMP1, 0x801
- | iseleq CARG2, TMP1, CARG2
- | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
- | // Returns Table *.
- } else {
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
- | bl extern lj_tab_dup // (lua_State *L, Table *kt)
- | // Returns Table *.
- }
- | lwz BASE, L->base
- | evmergelo TAB:CRET1, TISTAB, TAB:CRET1
- | evstddx TAB:CRET1, BASE, RA
- | ins_next
- |5:
- | mr SAVE0, RD
- | bl extern lj_gc_step_fixtop // (lua_State *L)
- | mr RD, SAVE0
- | mr CARG1, L
- | b <1
- break;
-
- case BC_GGET:
- | // RA = dst*8, RD = str_const*8 (~)
- case BC_GSET:
- | // RA = src*8, RD = str_const*8 (~)
- | lwz LFUNC:TMP2, FRAME_FUNC(BASE)
- | srwi TMP1, RD, 1
- | lwz TAB:RB, LFUNC:TMP2->env
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- if (op == BC_GGET) {
- | b ->BC_TGETS_Z
- } else {
- | b ->BC_TSETS_Z
- }
- break;
-
- case BC_TGETV:
- | // RA = dst*8, RB = table*8, RC = key*8
- | evlddx TAB:RB, BASE, RB
- | evlddx RC, BASE, RC
- | checktab TAB:RB
- | checkfail ->vmeta_tgetv
- | checknum RC
- | checkfail >5
- | // Convert number key to integer
- | efdctsi TMP2, RC
- | lwz TMP0, TAB:RB->asize
- | efdcfsi TMP1, TMP2
- | cmplw cr0, TMP0, TMP2
- | efdcmpeq cr1, RC, TMP1
- | lwz TMP1, TAB:RB->array
- | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- | slwi TMP2, TMP2, 3
- | ble ->vmeta_tgetv // Integer key and in array part?
- | evlddx TMP1, TMP1, TMP2
- | checknil TMP1
- | checkok >2
- |1:
- | evstddx TMP1, BASE, RA
- | ins_next
- |
- |2: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_index
- | bne <1 // 'no __index' flag set: done.
- | b ->vmeta_tgetv
- |
- |5:
- | checkstr STR:RC // String key?
- | checkok ->BC_TGETS_Z
- | b ->vmeta_tgetv
- break;
- case BC_TGETS:
- | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
- | evlddx TAB:RB, BASE, RB
- | srwi TMP1, RC, 1
- | checktab TAB:RB
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- | checkfail ->vmeta_tgets1
- |->BC_TGETS_Z:
- | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
- | lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | checkanyfail >4
- | checknil TMP1
- | checkok >5 // Key found, but nil value?
- |3:
- | evstddx TMP1, BASE, RA
- | ins_next
- |
- |4: // Follow hash chain.
- | lwz NODE:TMP2, NODE:TMP2->next
- | cmplwi NODE:TMP2, 0
- | bne <1
- | // End of hash chain: key not found, nil result.
- | evmr TMP1, TISNIL
- |
- |5: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <3 // No metatable: done.
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_index
- | bne <3 // 'no __index' flag set: done.
- | b ->vmeta_tgets
- break;
- case BC_TGETB:
- | // RA = dst*8, RB = table*8, RC = index*8
- | evlddx TAB:RB, BASE, RB
- | srwi TMP0, RC, 3
- | checktab TAB:RB
- | checkfail ->vmeta_tgetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1
- | bge ->vmeta_tgetb
- | evlddx TMP1, TMP2, RC
- | checknil TMP1
- | checkok >5
- |1:
- | ins_next1
- | evstddx TMP1, BASE, RA
- | ins_next2
- |
- |5: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_index
- | bne <1 // 'no __index' flag set: done.
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
- break;
-
- case BC_TSETV:
- | // RA = src*8, RB = table*8, RC = key*8
- | evlddx TAB:RB, BASE, RB
- | evlddx RC, BASE, RC
- | checktab TAB:RB
- | checkfail ->vmeta_tsetv
- | checknum RC
- | checkfail >5
- | // Convert number key to integer
- | efdctsi TMP2, RC
- | evlddx SAVE0, BASE, RA
- | lwz TMP0, TAB:RB->asize
- | efdcfsi TMP1, TMP2
- | cmplw cr0, TMP0, TMP2
- | efdcmpeq cr1, RC, TMP1
- | lwz TMP1, TAB:RB->array
- | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- | slwi TMP0, TMP2, 3
- | ble ->vmeta_tsetv // Integer key and in array part?
- | lbz TMP3, TAB:RB->marked
- | evlddx TMP2, TMP1, TMP0
- | checknil TMP2
- | checkok >3
- |1:
- | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
- | evstddx SAVE0, TMP1, TMP0
- | bne >7
- |2:
- | ins_next
- |
- |3: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_newindex
- | bne <1 // 'no __newindex' flag set: done.
- | b ->vmeta_tsetv
- |
- |5:
- | checkstr STR:RC // String key?
- | checkok ->BC_TSETS_Z
- | b ->vmeta_tsetv
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <2
- break;
- case BC_TSETS:
- | // RA = src*8, RB = table*8, RC = str_const*8 (~)
- | evlddx TAB:RB, BASE, RB
- | srwi TMP1, RC, 1
- | checktab TAB:RB
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- | checkfail ->vmeta_tsets1
- |->BC_TSETS_Z:
- | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
- | lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | evlddx SAVE0, BASE, RA
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | lbz TMP3, TAB:RB->marked
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | checkanyfail >5
- | checknil TMP1
- | checkok >4 // Key found, but nil value?
- |2:
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | evstdd SAVE0, NODE:TMP2->val
- | bne >7
- |3:
- | ins_next
- |
- |4: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP1, TAB:RB->metatable
- | cmplwi TAB:TMP1, 0
- | beq <2 // No metatable: done.
- | lbz TMP0, TAB:TMP1->nomm
- | andi. TMP0, TMP0, 1<<MM_newindex
- | bne <2 // 'no __newindex' flag set: done.
- | b ->vmeta_tsets
- |
- |5: // Follow hash chain.
- | lwz NODE:TMP2, NODE:TMP2->next
- | cmplwi NODE:TMP2, 0
- | bne <1
- | // End of hash chain: key not found, add a new one.
- |
- | // But check for __newindex first.
- | lwz TAB:TMP1, TAB:RB->metatable
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | stw PC, SAVE_PC
- | mr CARG1, L
- | cmplwi TAB:TMP1, 0
- | stw BASE, L->base
- | beq >6 // No metatable: continue.
- | lbz TMP0, TAB:TMP1->nomm
- | andi. TMP0, TMP0, 1<<MM_newindex
- | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
- | mr CARG2, TAB:RB
- | evstdd STR:RC, 0(CARG3)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lwz BASE, L->base
- | evstdd SAVE0, 0(CRET1)
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <3
- break;
- case BC_TSETB:
- | // RA = src*8, RB = table*8, RC = index*8
- | evlddx TAB:RB, BASE, RB
- | srwi TMP0, RC, 3
- | checktab TAB:RB
- | checkfail ->vmeta_tsetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | lbz TMP3, TAB:RB->marked
- | cmplw TMP0, TMP1
- | evlddx SAVE0, BASE, RA
- | bge ->vmeta_tsetb
- | evlddx TMP1, TMP2, RC
- | checknil TMP1
- | checkok >5
- |1:
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | evstddx SAVE0, TMP2, RC
- | bne >7
- |2:
- | ins_next
- |
- |5: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP1, TAB:RB->metatable
- | cmplwi TAB:TMP1, 0
- | beq <1 // No metatable: done.
- | lbz TMP1, TAB:TMP1->nomm
- | andi. TMP1, TMP1, 1<<MM_newindex
- | bne <1 // 'no __newindex' flag set: done.
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <2
- break;
-
- case BC_TSETM:
- | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
- | add RA, BASE, RA
- |1:
- | add TMP3, KBASE, RD
- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
- | addic. TMP0, MULTRES, -8
- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
- | srwi CARG3, TMP0, 3
- | beq >4 // Nothing to copy?
- | add CARG3, CARG3, TMP3
- | lwz TMP2, TAB:CARG2->asize
- | slwi TMP1, TMP3, 3
- | lbz TMP3, TAB:CARG2->marked
- | cmplw CARG3, TMP2
- | add TMP2, RA, TMP0
- | lwz TMP0, TAB:CARG2->array
- | bgt >5
- | add TMP1, TMP1, TMP0
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- |3: // Copy result slots to table.
- | evldd TMP0, 0(RA)
- | addi RA, RA, 8
- | cmpw cr1, RA, TMP2
- | evstdd TMP0, 0(TMP1)
- | addi TMP1, TMP1, 8
- | blt cr1, <3
- | bne >7
- |4:
- | ins_next
- |
- |5: // Need to resize array part.
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | mr SAVE0, RD
- | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
- | // Must not reallocate the stack.
- | mr RD, SAVE0
- | b <1
- |
- |7: // Possible table write barrier for any value. Skip valiswhite check.
- | barrierback TAB:CARG2, TMP3, TMP0
- | b <4
- break;
-
- /* -- Calls and vararg handling ----------------------------------------- */
-
- case BC_CALLM:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
- | add NARGS8:RC, NARGS8:RC, MULTRES
- | // Fall through. Assumes BC_CALL follows.
- break;
- case BC_CALL:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
- | evlddx LFUNC:RB, BASE, RA
- | mr TMP2, BASE
- | add BASE, BASE, RA
- | subi NARGS8:RC, NARGS8:RC, 8
- | checkfunc LFUNC:RB
- | addi BASE, BASE, 8
- | checkfail ->vmeta_call
- | ins_call
- break;
-
- case BC_CALLMT:
- | // RA = base*8, (RB = 0,) RC = extra_nargs*8
- | add NARGS8:RC, NARGS8:RC, MULTRES
- | // Fall through. Assumes BC_CALLT follows.
- break;
- case BC_CALLT:
- | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
- | evlddx LFUNC:RB, BASE, RA
- | add RA, BASE, RA
- | lwz TMP1, FRAME_PC(BASE)
- | subi NARGS8:RC, NARGS8:RC, 8
- | checkfunc LFUNC:RB
- | addi RA, RA, 8
- | checkfail ->vmeta_callt
- |->BC_CALLT_Z:
- | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
- | lbz TMP3, LFUNC:RB->ffid
- | xori TMP2, TMP1, FRAME_VARG
- | cmplwi cr1, NARGS8:RC, 0
- | bne >7
- |1:
- | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
- | li TMP2, 0
- | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function?
- | beq cr1, >3
- |2:
- | addi TMP3, TMP2, 8
- | evlddx TMP0, RA, TMP2
- | cmplw cr1, TMP3, NARGS8:RC
- | evstddx TMP0, BASE, TMP2
- | mr TMP2, TMP3
- | bne cr1, <2
- |3:
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
- | beq >5
- |4:
- | ins_callt
- |
- |5: // Tailcall to a fast function with a Lua frame below.
- | lwz INS, -4(TMP1)
- | decode_RA8 RA, INS
- | sub TMP1, BASE, RA
- | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
- | b <4
- |
- |7: // Tailcall from a vararg function.
- | andi. TMP0, TMP2, FRAME_TYPEP
- | bne <1 // Vararg frame below?
- | sub BASE, BASE, TMP2 // Relocate BASE down.
- | lwz TMP1, FRAME_PC(BASE)
- | andi. TMP0, TMP1, FRAME_TYPE
- | b <1
- break;
-
- case BC_ITERC:
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
- | subi RA, RA, 24 // evldd doesn't support neg. offsets.
- | mr TMP2, BASE
- | evlddx LFUNC:RB, BASE, RA
- | add BASE, BASE, RA
- | evldd TMP0, 8(BASE)
- | evldd TMP1, 16(BASE)
- | evstdd LFUNC:RB, 24(BASE) // Copy callable.
- | checkfunc LFUNC:RB
- | evstdd TMP0, 32(BASE) // Copy state.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
- | evstdd TMP1, 40(BASE) // Copy control var.
- | addi BASE, BASE, 32
- | checkfail ->vmeta_call
- | ins_call
- break;
-
- case BC_ITERN:
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
- |.if JIT
- | // NYI: add hotloop, record BC_ITERN.
- |.endif
- | add RA, BASE, RA
- | lwz TAB:RB, -12(RA)
- | lwz RC, -4(RA) // Get index from control var.
- | lwz TMP0, TAB:RB->asize
- | lwz TMP1, TAB:RB->array
- | addi PC, PC, 4
- |1: // Traverse array part.
- | cmplw RC, TMP0
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
- | evlddx TMP2, TMP1, TMP3
- | checknil TMP2
- | lwz INS, -4(PC)
- | checkok >4
- | efdcfsi TMP0, RC
- | addi RC, RC, 1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | evstdd TMP2, 8(RA)
- | decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
- | add PC, TMP1, TMP3
- | evstdd TMP0, 0(RA)
- |3:
- | ins_next
- |
- |4: // Skip holes in array part.
- | addi RC, RC, 1
- | b <1
- |
- |5: // Traverse hash part.
- | lwz TMP1, TAB:RB->hmask
- | sub RC, RC, TMP0
- | lwz TMP2, TAB:RB->node
- |6:
- | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
- | slwi TMP3, RC, 5
- | bgt <3
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
- | evlddx RB, TMP2, TMP3
- | add NODE:TMP3, TMP2, TMP3
- | checknil RB
- | lwz INS, -4(PC)
- | checkok >7
- | evldd TMP3, NODE:TMP3->key
- | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
- | evstdd RB, 8(RA)
- | add RC, RC, TMP0
- | decode_RD4 TMP1, INS
- | evstdd TMP3, 0(RA)
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
- | b <3
- |
- |7: // Skip holes in hash part.
- | addi RC, RC, 1
- | b <6
- break;
-
- case BC_ISNEXT:
- | // RA = base*8, RD = target (points to ITERN)
- | add RA, BASE, RA
- | li TMP2, -24
- | evlddx CFUNC:TMP1, RA, TMP2
- | lwz TMP2, -16(RA)
- | lwz TMP3, -8(RA)
- | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
- | cmpwi cr0, TMP2, LJ_TTAB
- | cmpwi cr1, TMP0, LJ_TFUNC
- | cmpwi cr6, TMP3, LJ_TNIL
- | bne cr1, >5
- | lbz TMP1, CFUNC:TMP1->ffid
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
- | cmpwi cr7, TMP1, FF_next_N
- | srwi TMP0, RD, 1
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
- | add TMP3, PC, TMP0
- | bne cr0, >5
- | lus TMP1, 0xfffe
- | ori TMP1, TMP1, 0x7fff
- | stw ZERO, -4(RA) // Initialize control var.
- | stw TMP1, -8(RA)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- |1:
- | ins_next
- |5: // Despecialize bytecode if any of the checks fail.
- | li TMP0, BC_JMP
- | li TMP1, BC_ITERC
- | stb TMP0, -1(PC)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- | stb TMP1, 3(PC)
- | b <1
- break;
-
- case BC_VARG:
- | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
- | lwz TMP0, FRAME_PC(BASE)
- | add RC, BASE, RC
- | add RA, BASE, RA
- | addi RC, RC, FRAME_VARG
- | add TMP2, RA, RB
- | subi TMP3, BASE, 8 // TMP3 = vtop
- | sub RC, RC, TMP0 // RC = vbase
- | // Note: RC may now be even _above_ BASE if nargs was < numparams.
- | cmplwi cr1, RB, 0
- | sub. TMP1, TMP3, RC
- | beq cr1, >5 // Copy all varargs?
- | subi TMP2, TMP2, 16
- | ble >2 // No vararg slots?
- |1: // Copy vararg slots to destination slots.
- | evldd TMP0, 0(RC)
- | addi RC, RC, 8
- | evstdd TMP0, 0(RA)
- | cmplw RA, TMP2
- | cmplw cr1, RC, TMP3
- | bge >3 // All destination slots filled?
- | addi RA, RA, 8
- | blt cr1, <1 // More vararg slots?
- |2: // Fill up remainder with nil.
- | evstdd TISNIL, 0(RA)
- | cmplw RA, TMP2
- | addi RA, RA, 8
- | blt <2
- |3:
- | ins_next
- |
- |5: // Copy all varargs.
- | lwz TMP0, L->maxstack
- | li MULTRES, 8 // MULTRES = (0+1)*8
- | ble <3 // No vararg slots?
- | add TMP2, RA, TMP1
- | cmplw TMP2, TMP0
- | addi MULTRES, TMP1, 8
- | bgt >7
- |6:
- | evldd TMP0, 0(RC)
- | addi RC, RC, 8
- | evstdd TMP0, 0(RA)
- | cmplw RC, TMP3
- | addi RA, RA, 8
- | blt <6 // More vararg slots?
- | b <3
- |
- |7: // Grow stack for varargs.
- | mr CARG1, L
- | stw RA, L->top
- | sub SAVE0, RC, BASE // Need delta, because BASE may change.
- | stw BASE, L->base
- | sub RA, RA, BASE
- | stw PC, SAVE_PC
- | srwi CARG2, TMP1, 3
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | add RA, BASE, RA
- | add RC, BASE, SAVE0
- | subi TMP3, BASE, 8
- | b <6
- break;
-
- /* -- Returns ----------------------------------------------------------- */
-
- case BC_RETM:
- | // RA = results*8, RD = extra_nresults*8
- | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
- | // Fall through. Assumes BC_RET follows.
- break;
-
- case BC_RET:
- | // RA = results*8, RD = (nresults+1)*8
- | lwz PC, FRAME_PC(BASE)
- | add RA, BASE, RA
- | mr MULTRES, RD
- |1:
- | andi. TMP0, PC, FRAME_TYPE
- | xori TMP1, PC, FRAME_VARG
- | bne ->BC_RETV_Z
- |
- |->BC_RET_Z:
- | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
- | lwz INS, -4(PC)
- | cmpwi RD, 8
- | subi TMP2, BASE, 8
- | subi RC, RD, 8
- | decode_RB8 RB, INS
- | beq >3
- | li TMP1, 0
- |2:
- | addi TMP3, TMP1, 8
- | evlddx TMP0, RA, TMP1
- | cmpw TMP3, RC
- | evstddx TMP0, TMP2, TMP1
- | beq >3
- | addi TMP1, TMP3, 8
- | evlddx TMP0, RA, TMP3
- | cmpw TMP1, RC
- | evstddx TMP0, TMP2, TMP3
- | bne <2
- |3:
- |5:
- | cmplw RB, RD
- | decode_RA8 RA, INS
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, TMP2, TMP1
- | b <5
- |
- |->BC_RETV_Z: // Non-standard return case.
- | andi. TMP2, TMP1, FRAME_TYPEP
- | bne ->vm_return
- | // Return from vararg function: relocate BASE down.
- | sub BASE, BASE, TMP1
- | lwz PC, FRAME_PC(BASE)
- | b <1
- break;
-
- case BC_RET0: case BC_RET1:
- | // RA = results*8, RD = (nresults+1)*8
- | lwz PC, FRAME_PC(BASE)
- | add RA, BASE, RA
- | mr MULTRES, RD
- | andi. TMP0, PC, FRAME_TYPE
- | xori TMP1, PC, FRAME_VARG
- | bne ->BC_RETV_Z
- |
- | lwz INS, -4(PC)
- | subi TMP2, BASE, 8
- | decode_RB8 RB, INS
- if (op == BC_RET1) {
- | evldd TMP0, 0(RA)
- | evstdd TMP0, 0(TMP2)
- }
- |5:
- | cmplw RB, RD
- | decode_RA8 RA, INS
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, TMP2, TMP1
- | b <5
- break;
-
- /* -- Loops and branches ------------------------------------------------ */
-
- case BC_FORL:
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_IFORL follows.
- break;
-
- case BC_JFORI:
- case BC_JFORL:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_FORI:
- case BC_IFORL:
- | // RA = base*8, RD = target (after end of loop or start of loop)
- vk = (op == BC_IFORL || op == BC_JFORL);
- | add RA, BASE, RA
- | evldd TMP1, FORL_IDX*8(RA)
- | evldd TMP3, FORL_STEP*8(RA)
- | evldd TMP2, FORL_STOP*8(RA)
- if (!vk) {
- | evcmpgtu cr0, TMP1, TISNUM
- | evcmpgtu cr7, TMP3, TISNUM
- | evcmpgtu cr1, TMP2, TISNUM
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | blt ->vmeta_for
- }
- if (vk) {
- | efdadd TMP1, TMP1, TMP3
- | evstdd TMP1, FORL_IDX*8(RA)
- }
- | evcmpgts TMP3, TISNIL
- | evstdd TMP1, FORL_EXT*8(RA)
- | bge >2
- | efdcmpgt TMP1, TMP2
- |1:
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- | add RD, PC, RD
- if (op == BC_JFORI) {
- | addis PC, RD, -(BCBIAS_J*4 >> 16)
- } else {
- | addis RD, RD, -(BCBIAS_J*4 >> 16)
- }
- }
- if (op == BC_FORI) {
- | iselgt PC, RD, PC
- } else if (op == BC_IFORL) {
- | iselgt PC, PC, RD
- } else {
- | ble =>BC_JLOOP
- }
- | ins_next
- |2:
- | efdcmpgt TMP2, TMP1
- | b <1
- break;
-
- case BC_ITERL:
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_IITERL follows.
- break;
-
- case BC_JITERL:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_IITERL:
- | // RA = base*8, RD = target
- | evlddx TMP1, BASE, RA
- | subi RA, RA, 8
- | checknil TMP1
- | checkok >1 // Stop if iterator returned nil.
- if (op == BC_JITERL) {
- | NYI
- } else {
- | branch_RD // Otherwise save control var + branch.
- | evstddx TMP1, BASE, RA
- }
- |1:
- | ins_next
- break;
-
- case BC_LOOP:
- | // RA = base*8, RD = target (loop extent)
- | // Note: RA/RD is only used by trace recorder to determine scope/extent
- | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_ILOOP follows.
- break;
-
- case BC_ILOOP:
- | // RA = base*8, RD = target (loop extent)
- | ins_next
- break;
-
- case BC_JLOOP:
- |.if JIT
- | NYI
- |.endif
- break;
-
- case BC_JMP:
- | // RA = base*8 (only used by trace recorder), RD = target
- | branch_RD
- | ins_next
- break;
-
- /* -- Function headers -------------------------------------------------- */
-
- case BC_FUNCF:
- |.if JIT
- | hotcall
- |.endif
- case BC_FUNCV: /* NYI: compiled vararg functions. */
- | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
- break;
-
- case BC_JFUNCF:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_IFUNCF:
- | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
- | lwz TMP2, L->maxstack
- | lbz TMP1, -4+PC2PROTO(numparams)(PC)
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw RA, TMP2
- | slwi TMP1, TMP1, 3
- | bgt ->vm_growstack_l
- | ins_next1
- |2:
- | cmplw NARGS8:RC, TMP1 // Check for missing parameters.
- | ble >3
- if (op == BC_JFUNCF) {
- | NYI
- } else {
- | ins_next2
- }
- |
- |3: // Clear missing parameters.
- | evstddx TISNIL, BASE, NARGS8:RC
- | addi NARGS8:RC, NARGS8:RC, 8
- | b <2
- break;
-
- case BC_JFUNCV:
-#if !LJ_HASJIT
- break;
-#endif
- | NYI // NYI: compiled vararg functions
- break; /* NYI: compiled vararg functions. */
-
- case BC_IFUNCV:
- | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
- | lwz TMP2, L->maxstack
- | add TMP1, BASE, RC
- | add TMP0, RA, RC
- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
- | addi TMP3, RC, 8+FRAME_VARG
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw TMP0, TMP2
- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
- | bge ->vm_growstack_l
- | lbz TMP2, -4+PC2PROTO(numparams)(PC)
- | mr RA, BASE
- | mr RC, TMP1
- | ins_next1
- | cmpwi TMP2, 0
- | addi BASE, TMP1, 8
- | beq >3
- |1:
- | cmplw RA, RC // Less args than parameters?
- | evldd TMP0, 0(RA)
- | bge >4
- | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
- | addi RA, RA, 8
- |2:
- | addic. TMP2, TMP2, -1
- | evstdd TMP0, 8(TMP1)
- | addi TMP1, TMP1, 8
- | bne <1
- |3:
- | ins_next2
- |
- |4: // Clear missing parameters.
- | evmr TMP0, TISNIL
- | b <2
- break;
-
- case BC_FUNCC:
- case BC_FUNCCW:
- | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
- if (op == BC_FUNCC) {
- | lwz TMP3, CFUNC:RB->f
- } else {
- | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
- }
- | add TMP1, RA, NARGS8:RC
- | lwz TMP2, L->maxstack
- | add RC, BASE, NARGS8:RC
- | stw BASE, L->base
- | cmplw TMP1, TMP2
- | stw RC, L->top
- | li_vmstate C
- | mtctr TMP3
- if (op == BC_FUNCCW) {
- | lwz CARG2, CFUNC:RB->f
- }
- | mr CARG1, L
- | bgt ->vm_growstack_c // Need to grow stack.
- | st_vmstate
- | bctrl // (lua_State *L [, lua_CFunction f])
- | // Returns nresults.
- | lwz TMP1, L->top
- | slwi RD, CRET1, 3
- | lwz BASE, L->base
- | li_vmstate INTERP
- | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
- | sub RA, TMP1, RD // RA = L->top - nresults*8
- | st_vmstate
- | b ->vm_returnc
- break;
-
- /* ---------------------------------------------------------------------- */
-
- default:
- fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
- exit(2);
- break;
- }
-}
-
-static int build_backend(BuildCtx *ctx)
-{
- int op;
-
- dasm_growpc(Dst, BC__MAX);
-
- build_subroutines(ctx);
-
- |.code_op
- for (op = 0; op < BC__MAX; op++)
- build_ins(ctx, (BCOp)op, op);
-
- return BC__MAX;
-}
-
-/* Emit pseudo frame-info for all assembler functions. */
-static void emit_asm_debug(BuildCtx *ctx)
-{
- int i;
- switch (ctx->mode) {
- case BUILD_elfasm:
- fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
- fprintf(ctx->fp,
- ".Lframe0:\n"
- "\t.long .LECIE0-.LSCIE0\n"
- ".LSCIE0:\n"
- "\t.long 0xffffffff\n"
- "\t.byte 0x1\n"
- "\t.string \"\"\n"
- "\t.uleb128 0x1\n"
- "\t.sleb128 -4\n"
- "\t.byte 65\n"
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
- ".LECIE0:\n\n");
- fprintf(ctx->fp,
- ".LSFDE0:\n"
- "\t.long .LEFDE0-.LASFDE0\n"
- ".LASFDE0:\n"
- "\t.long .Lframe0\n"
- "\t.long .Lbegin\n"
- "\t.long %d\n"
- "\t.byte 0xe\n\t.uleb128 %d\n"
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
- (int)ctx->codesz, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
- 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE0:\n\n");
- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
- fprintf(ctx->fp,
- ".Lframe1:\n"
- "\t.long .LECIE1-.LSCIE1\n"
- ".LSCIE1:\n"
- "\t.long 0\n"
- "\t.byte 0x1\n"
- "\t.string \"zPR\"\n"
- "\t.uleb128 0x1\n"
- "\t.sleb128 -4\n"
- "\t.byte 65\n"
- "\t.uleb128 6\n" /* augmentation length */
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
- "\t.long lj_err_unwind_dwarf-.\n"
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
- ".LECIE1:\n\n");
- fprintf(ctx->fp,
- ".LSFDE1:\n"
- "\t.long .LEFDE1-.LASFDE1\n"
- ".LASFDE1:\n"
- "\t.long .LASFDE1-.Lframe1\n"
- "\t.long .Lbegin-.\n"
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
- "\t.byte 0xe\n\t.uleb128 %d\n"
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
- (int)ctx->codesz, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
- 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE1:\n\n");
- break;
- default:
- break;
- }
-}
-
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index a0c7cc6..ea0415e 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -1,6 +1,6 @@
|// Low-level VM code for x86 CPUs.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
|.if P64
|.arch x64
@@ -373,7 +373,6 @@
| fpop
|.endmacro
|
-|.macro fdup; fld st0; .endmacro
|.macro fpop1; fstp st1; .endmacro
|
|// Synthesize SSE FP constants.
@@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx)
| cmp NARGS:RD, 2+1; jb ->fff_fallback
|.endmacro
|
- |.macro .ffunc_n, name
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | fld qword [BASE]
- |.endmacro
- |
- |.macro .ffunc_n, name, op
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | op
- | fld qword [BASE]
- |.endmacro
- |
|.macro .ffunc_nsse, name, op
| .ffunc_1 name
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc_nsse name, movsd
|.endmacro
|
- |.macro .ffunc_nn, name
- | .ffunc_2 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
- | fld qword [BASE]
- | fld qword [BASE+8]
- |.endmacro
- |
|.macro .ffunc_nnsse, name
| .ffunc_2 name
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1669,7 +1647,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov RD, 1+3
| jmp ->fff_res
|
- |.ffunc_1 ipairs_aux
+ |.ffunc_2 ipairs_aux
| cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
| cmp dword [BASE+12], LJ_TISNUM
|.if DUALNUM
@@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx)
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
| jmp ->vm_return
|
+ |.if X64
+ |.define fff_resfp, fff_resxmm0
+ |.else
+ |.define fff_resfp, fff_resn
+ |.endif
+ |
|.macro math_round, func
| .ffunc math_ .. func
|.if DUALNUM
@@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc math_log
| cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
- |
- |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
- |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn
- |
- |.ffunc_n math_sin; fsin; jmp ->fff_resn
- |.ffunc_n math_cos; fcos; jmp ->fff_resn
- |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
- |
- |.ffunc_n math_asin
- | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
- | jmp ->fff_resn
- |.ffunc_n math_acos
- | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
- | jmp ->fff_resn
- |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
+ | movsd xmm0, qword [BASE]
+ |.if not X64
+ | movsd FPARG1, xmm0
+ |.endif
+ | mov RB, BASE
+ | call extern log
+ | mov BASE, RB
+ | jmp ->fff_resfp
|
|.macro math_extern, func
| .ffunc_nsse math_ .. func
@@ -2084,20 +2060,38 @@ static void build_subroutines(BuildCtx *ctx)
| movsd FPARG1, xmm0
|.endif
| mov RB, BASE
- | call extern lj_vm_ .. func
+ | call extern func
| mov BASE, RB
- |.if X64
- | jmp ->fff_resxmm0
- |.else
- | jmp ->fff_resn
+ | jmp ->fff_resfp
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nnsse math_ .. func
+ |.if not X64
+ | movsd FPARG1, xmm0
+ | movsd FPARG3, xmm1
|.endif
+ | mov RB, BASE
+ | call extern func
+ | mov BASE, RB
+ | jmp ->fff_resfp
|.endmacro
|
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
| math_extern sinh
| math_extern cosh
| math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
|
- |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
|.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
|
|.ffunc_1 math_frexp
@@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx)
|4:
| xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
|
- |.ffunc_nnr math_fmod
- |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
- | fpop1
- | jmp ->fff_resn
- |
- |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
- |
|.macro math_minmax, name, cmovop, sseop
| .ffunc name
| mov RA, 2
@@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx)
|
|// FP value rounding. Called by math.floor/math.ceil fast functions
|// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
- |.macro vm_round, name, mode
+ |.macro vm_round, name, mode, cond
+ |->name:
+ |.if not X64 and cond
+ | movsd xmm0, qword [esp+4]
+ | call ->name .. _sse
+ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
+ | fld qword [esp+4]
+ | ret
+ |.endif
+ |
|->name .. _sse:
| sseconst_abs xmm2, RDa
| sseconst_2p52 xmm3, RDa
@@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|.endmacro
|
- |->vm_floor:
- |.if not X64
- | movsd xmm0, qword [esp+4]
- | call ->vm_floor_sse
- | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
- | fld qword [esp+4]
- | ret
- |.endif
- |
- | vm_round vm_floor, 0
- | vm_round vm_ceil, 1
- | vm_round vm_trunc, 2
+ | vm_round vm_floor, 0, 1
+ | vm_round vm_ceil, 1, JIT
+ | vm_round vm_trunc, 2, JIT
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|->vm_mod:
@@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx)
| subsd xmm0, xmm1
| ret
|
- |// FP log2(x). Called by math.log(x, base).
- |->vm_log2:
- |.if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | fld1
- | fld qword [rsp+8]
- | fyl2x
- | fstp qword [rsp+8]
- | movsd xmm0, qword [rsp+8]
- |.elif X64
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | fld1
- | fld qword [rsp-8]
- | fyl2x
- | fstp qword [rsp-8]
- | movsd xmm0, qword [rsp-8]
- |.else
- | fld1
- | fld qword [esp+4]
- | fyl2x
- |.endif
- | ret
- |
- |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
- |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
- |// Caveat: needs 3 slots on x87 stack!
- |->vm_exp_x87:
- | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
- |->vm_exp2_x87:
- | .if X64WIN
- | .define expscratch, dword [rsp+8] // Use scratch area.
- | .elif X64
- | .define expscratch, dword [rsp-8] // Use red zone.
- | .else
- | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
- | .endif
- | fst expscratch // Caveat: overwrites ARG1.
- | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
- | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
- |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
- | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
- | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
- |1:
- | ret
- |2:
- | fpop; fldz; ret
- |
- |// Generic power function x^y. Called by BC_POW, math.pow fast function,
- |// and vm_arith.
- |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
- |// Needs 16 byte scratch area for x86. Also called from JIT code.
- |->vm_pow_sse:
- | cvttsd2si eax, xmm1
- | cvtsi2sd xmm2, eax
- | ucomisd xmm1, xmm2
- | jnz >8 // Branch for FP exponents.
- | jp >9 // Branch for NaN exponent.
- | // Fallthrough.
- |
|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
|->vm_powi_sse:
| cmp eax, 1; jle >6 // i<=1?
@@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx)
| sseconst_1 xmm0, RDa
| ret
|
- |8: // FP/FP power function x^y.
- |.if X64
- | movd rax, xmm1; shl rax, 1
- | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
- | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
- | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
- | .if X64WIN
- | movsd qword [rsp+16], xmm1 // Use scratch area.
- | movsd qword [rsp+8], xmm0
- | fld qword [rsp+16]
- | fld qword [rsp+8]
- | .else
- | movsd qword [rsp-16], xmm1 // Use red zone.
- | movsd qword [rsp-8], xmm0
- | fld qword [rsp-16]
- | fld qword [rsp-8]
- | .endif
- |.else
- | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
- | movsd qword [esp+4], xmm0
- | cmp dword [esp+12], 0; jne >1
- | mov eax, [esp+16]; shl eax, 1
- | cmp eax, 0xffe00000; je >2 // x^+-Inf?
- |1:
- | cmp dword [esp+4], 0; jne >1
- | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
- | cmp eax, 0xffe00000; je >5 // +-Inf^y?
- |1:
- | fld qword [esp+12]
- | fld qword [esp+4]
- |.endif
- | fyl2x // y*log2(x)
- | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
- | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
- |.if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- |.elif X64
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- |.else
- | fstp qword [esp+4] // Needs 8 byte scratch area.
- | movsd xmm0, qword [esp+4]
- |.endif
- | ret
- |
- |9: // Handle x^NaN.
- | sseconst_1 xmm2, RDa
- | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
- | movaps xmm0, xmm1 // x^NaN ==> NaN
- |1:
- | ret
- |
- |2: // Handle x^+-Inf.
- | sseconst_abs xmm2, RDa
- | andpd xmm0, xmm2 // |x|
- | sseconst_1 xmm2, RDa
- | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
- | movmskpd eax, xmm1
- | xorps xmm0, xmm0
- | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
- |3:
- | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
- | ret
- |
- |4: // Handle +-0^y.
- | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
- | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
- | ret
- |
- |5: // Handle +-Inf^y.
- | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
- | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
- | ret
- |
- |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
- |// Computes fpm(x) for extended math functions. ORDER FPM.
- |->vm_foldfpm:
- |.if JIT
- |.if X64
- | .if X64WIN
- | .define fpmop, CARG2d
- | .else
- | .define fpmop, CARG1d
- | .endif
- | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
- | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
- | sqrtsd xmm0, xmm0; ret
- |2:
- | .if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | fld qword [rsp+8]
- | .else
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | fld qword [rsp-8]
- | .endif
- | cmp fpmop, 5; ja >2
- | .if X64WIN; pop rax; .endif
- | je >1
- | call ->vm_exp_x87
- | .if X64WIN; push rax; .endif
- | jmp >7
- |1:
- | call ->vm_exp2_x87
- | .if X64WIN; push rax; .endif
- | jmp >7
- |2: ; cmp fpmop, 7; je >1; ja >2
- | fldln2; fxch; fyl2x; jmp >7
- |1: ; fld1; fxch; fyl2x; jmp >7
- |2: ; cmp fpmop, 9; je >1; ja >2
- | fldlg2; fxch; fyl2x; jmp >7
- |1: ; fsin; jmp >7
- |2: ; cmp fpmop, 11; je >1; ja >9
- | fcos; jmp >7
- |1: ; fptan; fpop
- |7:
- | .if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- | .else
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- | .endif
- | ret
- |.else // x86 calling convention.
- | .define fpmop, eax
- | mov fpmop, [esp+12]
- | movsd xmm0, qword [esp+4]
- | cmp fpmop, 1; je >1; ja >2
- | call ->vm_floor_sse; jmp >7
- |1: ; call ->vm_ceil_sse; jmp >7
- |2: ; cmp fpmop, 3; je >1; ja >2
- | call ->vm_trunc_sse; jmp >7
- |1:
- | sqrtsd xmm0, xmm0
- |7:
- | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
- | fld qword [esp+4]
- | ret
- |2: ; fld qword [esp+4]
- | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
- |2: ; cmp fpmop, 7; je >1; ja >2
- | fldln2; fxch; fyl2x; ret
- |1: ; fld1; fxch; fyl2x; ret
- |2: ; cmp fpmop, 9; je >1; ja >2
- | fldlg2; fxch; fyl2x; ret
- |1: ; fsin; ret
- |2: ; cmp fpmop, 11; je >1; ja >9
- | fcos; ret
- |1: ; fptan; fpop; ret
- |.endif
- |9: ; int3 // Bad fpm.
- |.endif
- |
- |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
- |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
- |// and basic math functions. ORDER ARITH
- |->vm_foldarith:
- |.if X64
- |
- | .if X64WIN
- | .define foldop, CARG3d
- | .else
- | .define foldop, CARG1d
- | .endif
- | cmp foldop, 1; je >1; ja >2
- | addsd xmm0, xmm1; ret
- |1: ; subsd xmm0, xmm1; ret
- |2: ; cmp foldop, 3; je >1; ja >2
- | mulsd xmm0, xmm1; ret
- |1: ; divsd xmm0, xmm1; ret
- |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
- | cmp foldop, 7; je >1; ja >2
- | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
- |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
- |2: ; cmp foldop, 9; ja >2
- |.if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | movsd qword [rsp+16], xmm1
- | fld qword [rsp+8]
- | fld qword [rsp+16]
- |.else
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | movsd qword [rsp-16], xmm1
- | fld qword [rsp-8]
- | fld qword [rsp-16]
- |.endif
- | je >1
- | fpatan
- |7:
- |.if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- |.else
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- |.endif
- | ret
- |1: ; fxch; fscale; fpop1; jmp <7
- |2: ; cmp foldop, 11; je >1; ja >9
- | minsd xmm0, xmm1; ret
- |1: ; maxsd xmm0, xmm1; ret
- |9: ; int3 // Bad op.
- |
- |.else // x86 calling convention.
- |
- | .define foldop, eax
- | mov foldop, [esp+20]
- | movsd xmm0, qword [esp+4]
- | movsd xmm1, qword [esp+12]
- | cmp foldop, 1; je >1; ja >2
- | addsd xmm0, xmm1
- |7:
- | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
- | fld qword [esp+4]
- | ret
- |1: ; subsd xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 3; je >1; ja >2
- | mulsd xmm0, xmm1; jmp <7
- |1: ; divsd xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 5
- | je >1; ja >2
- | call ->vm_mod; jmp <7
- |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
- |2: ; cmp foldop, 7; je >1; ja >2
- | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
- |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 9; ja >2
- | fld qword [esp+4] // Reload from stack
- | fld qword [esp+12]
- | je >1
- | fpatan; ret
- |1: ; fxch; fscale; fpop1; ret
- |2: ; cmp foldop, 11; je >1; ja >9
- | minsd xmm0, xmm1; jmp <7
- |1: ; maxsd xmm0, xmm1; jmp <7
- |9: ; int3 // Bad op.
- |
- |.endif
- |
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
@@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_POW:
| ins_arithpre movsd, xmm1
- | call ->vm_pow_sse
+ | mov RB, BASE
+ |.if not X64
+ | movsd FPARG1, xmm0
+ | movsd FPARG3, xmm1
+ |.endif
+ | call extern pow
+ | movzx RA, PC_RA
+ | mov BASE, RB
+ |.if X64
| ins_arithpost
+ |.else
+ | fstp qword [BASE+RA*8]
+ |.endif
| ins_next
break;
@@ -4950,8 +4649,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| jnz <4
| movzx RA, PC_RA
| not RAa
- | lea RA, [BASE+RA*8]
- | mov LFUNC:KBASE, [RA-8] // Need to prepare KBASE.
+ | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
| jmp <4
@@ -5962,15 +5660,21 @@ static void emit_asm_debug(BuildCtx *ctx)
"LEFDEY:\n\n", fcsize);
}
#endif
-#if LJ_64
- fprintf(ctx->fp, "\t.subsections_via_symbols\n");
-#else
+#if !LJ_64
fprintf(ctx->fp,
"\t.non_lazy_symbol_pointer\n"
"L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
".indirect_symbol _lj_err_unwind_dwarf\n"
- ".long 0\n");
+ ".long 0\n\n");
+ fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
+ {
+ const char *const *xn;
+ for (xn = ctx->extnames; *xn; xn++)
+ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
+ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
+ }
#endif
+ fprintf(ctx->fp, ".subsections_via_symbols\n");
}
break;
default: /* Difficult for other modes. */