diff options
author | Ronan Collobert <locronan@fb.com> | 2017-04-18 23:51:34 +0300 |
---|---|---|
committer | Ronan Collobert <locronan@fb.com> | 2017-04-18 23:51:34 +0300 |
commit | af425c043c9f804bf9e08fc9298d146d21fee00f (patch) | |
tree | ae26a35f03ab2b7bce01f4e24c8abb4986e9204c | |
parent | c63c58b971923136ffff830af2bd5728673b5206 (diff) | |
parent | abd5cbe0a68af28f4194b82c2b038c87b4ff81cd (diff) |
Merge commit 'abd5cbe0a68af28f4194b82c2b038c87b4ff81cd'
209 files changed, 16332 insertions, 2619 deletions
diff --git a/luajit-2.1/COPYRIGHT b/luajit-2.1/COPYRIGHT index 1ef7df6..6ed4002 100644 --- a/luajit-2.1/COPYRIGHT +++ b/luajit-2.1/COPYRIGHT @@ -1,7 +1,7 @@ =============================================================================== LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ -Copyright (C) 2005-2015 Mike Pall. All rights reserved. +Copyright (C) 2005-2017 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/luajit-2.1/Makefile b/luajit-2.1/Makefile index 8ce773e..e6472e0 100644 --- a/luajit-2.1/Makefile +++ b/luajit-2.1/Makefile @@ -10,13 +10,13 @@ # For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MinGW and Cygwin, cd to src and run make with the Makefile there. # -# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 MINVER= 1 RELVER= 0 -PREREL= -beta1 +PREREL= -beta2 VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) ABIVER= 5.1 @@ -47,17 +47,18 @@ INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig INSTALL_TNAME= luajit-$(VERSION) INSTALL_TSYMNAME= luajit INSTALL_ANAME= libluajit-$(ABIVER).a -INSTALL_SONAME= libluajit-$(ABIVER).so.$(MAJVER).$(MINVER).$(RELVER) -INSTALL_SOSHORT= libluajit-$(ABIVER).so -INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib +INSTALL_SOSHORT1= libluajit-$(ABIVER).so +INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER) INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib INSTALL_PCNAME= luajit.pc INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) -INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT) -INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT) +INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) +INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) @@ -85,16 +86,22 @@ FILE_MAN= luajit.1 FILE_PC= luajit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \ - dis_mips.lua dis_mipsel.lua vmdef.lua + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua ifeq (,$(findstring Windows,$(OS))) - ifeq (Darwin,$(shell uname -s)) - INSTALL_SONAME= $(INSTALL_DYLIBNAME) - INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT1) - INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_DYLIBSHORT2) - LDCONFIG= : - endif + HOST_SYS:= $(shell uname -s) +else + HOST_SYS= Windows +endif +TARGET_SYS?= $(HOST_SYS) + +ifeq (Darwin,$(TARGET_SYS)) + INSTALL_SONAME= $(INSTALL_DYLIBNAME) + INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) + INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) + LDCONFIG= : endif ############################################################################## diff --git a/luajit-2.1/README b/luajit-2.1/README index ca70dd8..719e611 100644 --- a/luajit-2.1/README +++ b/luajit-2.1/README @@ -1,11 +1,11 @@ -README for LuaJIT 2.1.0-beta1 +README for LuaJIT 2.1.0-beta2 ----------------------------- LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. Project Homepage: http://luajit.org/ -LuaJIT is Copyright (C) 2005-2015 Mike Pall. +LuaJIT is Copyright (C) 2005-2017 Mike Pall. LuaJIT is free software, released under the MIT license. See full Copyright Notice in the COPYRIGHT file or in luajit.h. diff --git a/luajit-2.1/doc/bluequad-print.css b/luajit-2.1/doc/bluequad-print.css index 07f5c84..62e1c16 100644 --- a/luajit-2.1/doc/bluequad-print.css +++ b/luajit-2.1/doc/bluequad-print.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2015 Mike Pall. +/* Copyright (C) 2004-2017 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/luajit-2.1/doc/bluequad.css b/luajit-2.1/doc/bluequad.css index ae53143..be2c4bf 100644 --- a/luajit-2.1/doc/bluequad.css +++ b/luajit-2.1/doc/bluequad.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2015 Mike Pall. +/* Copyright (C) 2004-2017 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/luajit-2.1/doc/changes.html b/luajit-2.1/doc/changes.html index 826cd24..426b18f 100644 --- a/luajit-2.1/doc/changes.html +++ b/luajit-2.1/doc/changes.html @@ -4,7 +4,7 @@ <title>LuaJIT Change History</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -74,6 +74,19 @@ to see whether newer versions are available. </p> <div class="major" style="background: #d0d0ff;"> +<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 — 2016-03-03</h2> +<ul> +<li>Enable trace stitching.</li> +<li>Use internal implementation for converting FP numbers to strings.</li> +<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> +<li>Add MIPS soft-float support.</li> +<li>Switch MIPS port to dual-number mode.</li> +<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> +<li>FFI: Add <tt>ssize_t</tt> declaration.</li> +<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li> +<li>Various minor fixes.</li> +</ul> + <h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 — 2015-08-25</h2> <p> This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. @@ -86,12 +99,11 @@ Please take a look at the commit history for more details. <li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> <li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> <li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> -<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> <li>Parse binary number literals (<tt>0bxxx</tt>).</li> </ul></li> <li>Improvements to the JIT compiler: <ul> -<li>Add trace stitching.</li> +<li>Add trace stitching (disabled for now).</li> <li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> <li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> <li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> @@ -113,7 +125,6 @@ Please take a look at the commit history for more details. <li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li> <li>x86/x64: Drop internal x87 math functions. Use libm functions.</li> <li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li> -<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> <li>PPC/e500: Drop support for this architecture.</li> </ul></li> <li>FFI library: @@ -124,7 +135,6 @@ Please take a look at the commit history for more details. <li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li> <li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li> <li>FFI: Add <tt>ffi.typeinfo()</tt>.</li> -<li>FFI: Add <tt>ssize_t</tt> declaration.</li> </ul></li> </ul> </div> @@ -797,7 +807,7 @@ no point in listing differences over earlier versions.</li> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/contact.html b/luajit-2.1/doc/contact.html index d92c3e3..fe4751c 100644 --- a/luajit-2.1/doc/contact.html +++ b/luajit-2.1/doc/contact.html @@ -4,7 +4,7 @@ <title>Contact</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -61,8 +61,15 @@ </div> <div id="main"> <p> +If you want to report bugs, propose fixes or suggest enhancements, +please use the +<a href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue tracker</a>. +</p> +<p> Please send general questions to the <a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. +</p> +<p> You can also send any questions you have directly to me: </p> @@ -86,7 +93,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D") <h2>Copyright</h2> <p> All documentation is -Copyright © 2005-2015 Mike Pall. +Copyright © 2005-2017 Mike Pall. </p> @@ -94,7 +101,7 @@ Copyright © 2005-2015 Mike Pall. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_c_api.html b/luajit-2.1/doc/ext_c_api.html index 91dd9ef..ad462c6 100644 --- a/luajit-2.1/doc/ext_c_api.html +++ b/luajit-2.1/doc/ext_c_api.html @@ -4,7 +4,7 @@ <title>Lua/C API Extensions</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -179,7 +179,7 @@ Also note that this mechanism is not without overhead. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_ffi.html b/luajit-2.1/doc/ext_ffi.html index 1ff2236..5e1daaf 100644 --- a/luajit-2.1/doc/ext_ffi.html +++ b/luajit-2.1/doc/ext_ffi.html @@ -4,7 +4,7 @@ <title>FFI Library</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -322,7 +322,7 @@ without undue conversion penalties. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_ffi_api.html b/luajit-2.1/doc/ext_ffi_api.html index b095c05..91af2e1 100644 --- a/luajit-2.1/doc/ext_ffi_api.html +++ b/luajit-2.1/doc/ext_ffi_api.html @@ -4,7 +4,7 @@ <title>ffi.* API Functions</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -560,7 +560,7 @@ named <tt>i</tt>. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_ffi_semantics.html b/luajit-2.1/doc/ext_ffi_semantics.html index f65fe8f..bba03b7 100644 --- a/luajit-2.1/doc/ext_ffi_semantics.html +++ b/luajit-2.1/doc/ext_ffi_semantics.html @@ -4,7 +4,7 @@ <title>FFI Semantics</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -1253,7 +1253,7 @@ compiled.</li> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_ffi_tutorial.html b/luajit-2.1/doc/ext_ffi_tutorial.html index e3f0146..3650066 100644 --- a/luajit-2.1/doc/ext_ffi_tutorial.html +++ b/luajit-2.1/doc/ext_ffi_tutorial.html @@ -4,7 +4,7 @@ <title>FFI Tutorial</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -593,7 +593,7 @@ it to a local variable in the function scope is unnecessary. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_jit.html b/luajit-2.1/doc/ext_jit.html index a569dd5..e4088bc 100644 --- a/luajit-2.1/doc/ext_jit.html +++ b/luajit-2.1/doc/ext_jit.html @@ -4,7 +4,7 @@ <title>jit.* Library</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -153,7 +153,7 @@ Contains the target OS name: <h3 id="jit_arch"><tt>jit.arch</tt></h3> <p> Contains the target architecture name: -"x86", "x64", "arm", "ppc", or "mips". +"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". </p> <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> @@ -191,7 +191,7 @@ if you want to know more. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/ext_profiler.html b/luajit-2.1/doc/ext_profiler.html index be63662..71b8c03 100644 --- a/luajit-2.1/doc/ext_profiler.html +++ b/luajit-2.1/doc/ext_profiler.html @@ -4,7 +4,7 @@ <title>Profiler</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -355,7 +355,7 @@ use. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/extensions.html b/luajit-2.1/doc/extensions.html index e034e1d..d7cc969 100644 --- a/luajit-2.1/doc/extensions.html +++ b/luajit-2.1/doc/extensions.html @@ -4,7 +4,7 @@ <title>Extensions</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -210,7 +210,8 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. <p> Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies a different, incompatible bytecode format for ports that use this mode (e.g. -ARM64). This may be rectified in the future. +ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified +in the future. </p> <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> @@ -291,8 +292,8 @@ enabled: <li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li> <li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li> <li><tt>loadfile(filename [,mode [,env]])</tt>.</li> -<li><tt>math.log(x [,base])</tt>. -<li><tt>string.rep(s, n [,sep])</tt>. +<li><tt>math.log(x [,base])</tt>.</li> +<li><tt>string.rep(s, n [,sep])</tt>.</li> <li><tt>string.format()</tt>: <tt>%q</tt> reversible. <tt>%s</tt> checks <tt>__tostring</tt>. <tt>%a</tt> and <tt>"%A</tt> added.</li> @@ -311,6 +312,26 @@ indexes for varargs.</li> <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle C functions.</li> <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_version()</tt> +<tt>lua_upvalueid()</tt> +<tt>lua_upvaluejoin()</tt> +<tt>lua_loadx()</tt> +<tt>lua_copy()</tt> +<tt>lua_tonumberx()</tt> +<tt>lua_tointegerx()</tt> +<tt>luaL_fileresult()</tt> +<tt>luaL_execresult()</tt> +<tt>luaL_loadfilex()</tt> +<tt>luaL_loadbufferx()</tt> +<tt>luaL_traceback()</tt> +<tt>luaL_setfuncs()</tt> +<tt>luaL_pushmodule()</tt> +<tt>luaL_newlibtable()</tt> +<tt>luaL_newlib()</tt> +<tt>luaL_testudata()</tt> +<tt>luaL_setmetatable()</tt> +</li> <li>Command line option <tt>-E</tt>.</li> <li>Command line checks <tt>__tostring</tt> for errors.</li> </ul> @@ -335,7 +356,9 @@ instead of <tt>true</tt>.</li> exit status.</li> <li><tt>debug.setmetatable()</tt> returns object.</li> <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> -<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>. +<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> +<li><tt>package.searchers</tt>.</li> +<li><tt>module()</tt> returns the module table.</li> </ul> <p> Note: this provides only partial compatibility with Lua 5.2 at the @@ -349,6 +372,13 @@ break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). LuaJIT supports some extensions from Lua 5.3: <ul> <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> +<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> +<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> +<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> +<li><tt>coroutine.isyieldable()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_isyieldable()</tt> +</li> </ul> <h2 id="exceptions">C++ Exception Interoperability</h2> @@ -365,25 +395,30 @@ the toolchain used to compile LuaJIT: </tr> <tr class="odd separate"> <td class="excplatform">POSIX/x64, DWARF2 unwinding</td> -<td class="exccompiler">GCC 4.3+</td> +<td class="exccompiler">GCC 4.3+, Clang</td> <td class="excinterop"><b style="color: #00a000;">Full</b></td> </tr> <tr class="even"> +<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td> +<td class="exccompiler">GCC, Clang</td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> +</tr> +<tr class="odd"> <td class="excplatform">Other platforms, DWARF2 unwinding</td> -<td class="exccompiler">GCC</td> +<td class="exccompiler">GCC, Clang</td> <td class="excinterop"><b style="color: #c06000;">Limited</b></td> </tr> -<tr class="odd"> +<tr class="even"> <td class="excplatform">Windows/x64</td> <td class="exccompiler">MSVC or WinSDK</td> <td class="excinterop"><b style="color: #00a000;">Full</b></td> </tr> -<tr class="even"> +<tr class="odd"> <td class="excplatform">Windows/x86</td> <td class="exccompiler">Any</td> -<td class="excinterop"><b style="color: #a00000;">No</b></td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> </tr> -<tr class="odd"> +<tr class="even"> <td class="excplatform">Other platforms</td> <td class="exccompiler">Other compilers</td> <td class="excinterop"><b style="color: #a00000;">No</b></td> @@ -432,20 +467,12 @@ C++ destructors.</li> <li>Lua errors <b>cannot</b> be caught on the C++ side.</li> <li>Throwing Lua errors across C++ frames will <b>not</b> call C++ destructors.</li> -<li>Additionally, on Windows/x86 with SEH-based C++ exceptions: -it's <b>not</b> safe to throw a Lua error across any frames containing -a C++ function with any try/catch construct or using variables with -(implicit) destructors. This also applies to any functions which may be -inlined in such a function. It doesn't matter whether <tt>lua_error()</tt> -is called inside or outside of a try/catch or whether any object actually -needs to be destroyed: the SEH chain is corrupted and this will eventually -lead to the termination of the process.</li> </ul> <br class="flush"> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/faq.html b/luajit-2.1/doc/faq.html index aebaef5..2c93074 100644 --- a/luajit-2.1/doc/faq.html +++ b/luajit-2.1/doc/faq.html @@ -4,7 +4,7 @@ <title>Frequently Asked Questions (FAQ)</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -176,7 +176,7 @@ the development of certain features, if they are important to you. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/install.html b/luajit-2.1/doc/install.html index a4cc721..851f910 100644 --- a/luajit-2.1/doc/install.html +++ b/luajit-2.1/doc/install.html @@ -4,7 +4,7 @@ <title>Installation</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -122,7 +122,7 @@ operating systems, CPUs and compilers: <tr class="even"> <td class="compatcpu">x64 (64 bit)</td> <td class="compatos">GCC 4.2+</td> -<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> +<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td> <td class="compatos">XCode 5.0+<br>Clang</td> <td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td> </tr> @@ -148,7 +148,7 @@ operating systems, CPUs and compilers: <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> </tr> <tr class="even"> -<td class="compatcpu"><a href="#cross2">MIPS</a></td> +<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td> <td class="compatos">GCC 4.3+</td> <td class="compatos">GCC 4.3+</td> <td class="compatos compatno"> </td> @@ -175,6 +175,14 @@ MSVC or WinSDK.</li> Please read the instructions given in these files, before changing any settings. </p> +<p> +LuaJIT on x64 currently uses 32 bit GC objects by default. +<tt>LJ_GC64</tt> mode may be explicitly enabled: +add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run +<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note +about the <a href="extensions.html#string_dump">bytecode format</a> +differences, too. +</p> <h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> <h3>Prerequisites</h3> @@ -202,7 +210,7 @@ which is probably the default on your system, anyway. Simply run: make </pre> <p> -This always builds a native x86, x64 or PPC binary, depending on the host OS +This always builds a native binary, depending on the host OS you're running this command on. Check the section on <a href="#cross">cross-compilation</a> for more options. </p> @@ -333,22 +341,36 @@ directory where <tt>luajit.exe</tt> is installed <h2 id="cross">Cross-compiling LuaJIT</h2> <p> +First, let's clear up some terminology: +</p> +<ul> +<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li> +<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li> +<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li> +<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li> +<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li> +</ul> +<p> The GNU Makefile-based build system allows cross-compiling on any host -for any supported target, as long as both architectures have the same -pointer size. If you want to cross-compile to any 32 bit target on an -x64 OS, you need to install the multilib development package (e.g. -<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part -(<tt>HOST_CC="gcc -m32"</tt>). +for any supported target: </p> +<ul> +<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> +<li>Both host and target architectures must have the same pointer size.</li> +<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> +<li>64 bit targets always require compilation on a 64 bit host.</li> +</ul> <p> You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the -target OS differ, or you'll get assembler or linker errors. E.g. if -you're compiling on a Windows or OSX host for embedded Linux or Android, -you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a -minimal target OS, you may need to disable the built-in allocator in -<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. The examples -below only show some popular targets — please check the comments -in <tt>src/Makefile</tt> for more details. +target OS differ, or you'll get assembler or linker errors: +</p> +<ul> +<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> +<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> +<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> +</ul> +<p> +Here are some examples where host and target have the same CPU: </p> <pre class="code"> # Cross-compile to a 32 bit binary on a multilib x64 OS @@ -366,38 +388,47 @@ use the canonical toolchain triplets for Linux. </p> <p> Since there's often no easy way to detect CPU features at runtime, it's -important to compile with the proper CPU or architecture settings. You -can specify these when building the toolchain yourself. Or add -<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For -ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, -too. Otherwise LuaJIT may not run at the full performance of your target -CPU. +important to compile with the proper CPU or architecture settings: +</o> +<ul> +<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li> +<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li> +<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li> +<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li> +</ul> +<p> +Here are some examples for targets with a different CPU than the host: </p> <pre class="code"> # ARM soft-float make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ TARGET_CFLAGS="-mfloat-abi=soft" -# ARM soft-float ABI with VFP (example for Cortex-A8) +# ARM soft-float ABI with VFP (example for Cortex-A9) make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ - TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" + TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp" -# ARM hard-float ABI with VFP (armhf, requires recent toolchain) +# ARM hard-float ABI with VFP (armhf, most modern toolchains) make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- -# ARM64 (requires x64 host) +# ARM64 make CROSS=aarch64-linux- # PPC make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- -# MIPS big-endian +# MIPS32 big-endian make HOST_CC="gcc -m32" CROSS=mips-linux- -# MIPS little-endian +# MIPS32 little-endian make HOST_CC="gcc -m32" CROSS=mipsel-linux- + +# MIPS64 big-endian +make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" +# MIPS64 little-endian +make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" </pre> <p> -You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/sdk/ndk/index.html"><span class="ext">»</span> Android NDK</a>. +You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>. The environment variables need to match the install locations and the desired target platform. E.g. Android 4.0 corresponds to ABI level 14. For details check the folder <tt>docs</tt> in the NDK directory. @@ -411,7 +442,7 @@ to build/deploy or which lowest common denominator you want to pick: # Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) NDK=/opt/android/ndk NDKABI=8 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -419,16 +450,16 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.6 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" -# Android/MIPS, mips (MIPS32R1 hard-float), Android 4.0+ (ICS) +# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/mipsel-linux-android-4.6 +NDKVER=$NDK/toolchains/mipsel-linux-android-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -436,7 +467,7 @@ make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" # Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) NDK=/opt/android/ndk NDKABI=14 -NDKVER=$NDK/toolchains/x86-4.6 +NDKVER=$NDK/toolchains/x86-4.9 NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" @@ -456,14 +487,15 @@ Or use Android. :-p ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ICC=$(xcrun --sdk iphoneos --find clang) ISDKF="-arch armv7 -isysroot $ISDKP" -make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \ - TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \ + CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS # iOS/ARM64 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ICC=$(xcrun --sdk iphoneos --find clang) ISDKF="-arch arm64 -isysroot $ISDKP" -make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ + TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS </pre> <h3 id="consoles">Cross-compiling for consoles</h3> @@ -560,14 +592,11 @@ intend to load Lua/C modules at runtime. </li> <li> If you're building a 64 bit application on OSX which links directly or -indirectly against LuaJIT, you need to link your main executable -with these flags: +indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode, +you need to link your main executable with these flags: <pre class="code"> -pagezero_size 10000 -image_base 100000000 </pre> -Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries -which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua). -See: <tt>man rebase</tt> </li> </ul> <p>Additional hints for initializing LuaJIT using the C API functions:</p> @@ -653,7 +682,7 @@ to me (the upstream) and not you (the package maintainer), anyway. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/luajit.html b/luajit-2.1/doc/luajit.html index 8a653e2..ef5b824 100644 --- a/luajit-2.1/doc/luajit.html +++ b/luajit-2.1/doc/luajit.html @@ -4,7 +4,7 @@ <title>LuaJIT</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -152,7 +152,7 @@ Lua is a powerful, dynamic and light-weight programming language. It may be embedded or used as a general-purpose, stand-alone language. </p> <p> -LuaJIT is Copyright © 2005-2015 Mike Pall, released under the +LuaJIT is Copyright © 2005-2017 Mike Pall, released under the <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. </p> <p> @@ -169,10 +169,10 @@ LuaJIT is Copyright © 2005-2015 Mike Pall, released under the <tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr> </table> <table class="feature compiler"> -<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> +<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr> </table> <table class="feature cpu"> -<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr> +<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr> </table> <table class="feature fcompat"> <tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr> @@ -226,7 +226,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/running.html b/luajit-2.1/doc/running.html index 5ee67c9..64f0491 100644 --- a/luajit-2.1/doc/running.html +++ b/luajit-2.1/doc/running.html @@ -4,7 +4,7 @@ <title>Running LuaJIT</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -299,7 +299,7 @@ Here are the parameters and their default settings: </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/doc/status.html b/luajit-2.1/doc/status.html index 91ed9cb..cad6ca6 100644 --- a/luajit-2.1/doc/status.html +++ b/luajit-2.1/doc/status.html @@ -4,7 +4,7 @@ <title>Status</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall"> +<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -91,24 +91,29 @@ hooks for non-Lua functions) and shows slightly different behavior in LuaJIT (no per-coroutine hooks, no tail call counting). </li> <li> -Some checks are missing in the JIT-compiled code for obscure situations -with <b>open upvalues aliasing</b> one of the SSA slots later on (or -vice versa). Bonus points, if you can find a real world test case for -this. -</li> -<li> Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not handled correctly. The error may fall through an on-trace <tt>pcall</tt> or it may be passed on to the function set with <tt>lua_atpanic</tt> on x64. This issue will be fixed with the new garbage collector. </li> +<li> +LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the +<b>legacy <tt>lightuserdata</tt></b> data type. +This is only relevant on x64 systems which use the negative part of the +virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems +configured with a 48 bit or 52 bit VA. +Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside +of that range, e.g. variables on the stack. In general, avoid this data +type for new code and replace it with (much more performant) FFI bindings. +FFI cdata pointers can address the full 64 bit range. +</li> </ul> <br class="flush"> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2015 Mike Pall +Copyright © 2005-2017 Mike Pall <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/luajit-2.1/dynasm/dasm_arm.h b/luajit-2.1/dynasm/dasm_arm.h index 57e0116..a43f7c6 100644 --- a/luajit-2.1/dynasm/dasm_arm.h +++ b/luajit-2.1/dynasm/dasm_arm.h @@ -1,6 +1,6 @@ /* ** DynASM ARM encoding engine. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/luajit-2.1/dynasm/dasm_arm.lua b/luajit-2.1/dynasm/dasm_arm.lua index 6a1d1d5..32f595a 100644 --- a/luajit-2.1/dynasm/dasm_arm.lua +++ b/luajit-2.1/dynasm/dasm_arm.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/luajit-2.1/dynasm/dasm_arm64.h b/luajit-2.1/dynasm/dasm_arm64.h index d912e61..47e1e07 100644 --- a/luajit-2.1/dynasm/dasm_arm64.h +++ b/luajit-2.1/dynasm/dasm_arm64.h @@ -1,6 +1,6 @@ /* ** DynASM ARM64 encoding engine. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/luajit-2.1/dynasm/dasm_arm64.lua b/luajit-2.1/dynasm/dasm_arm64.lua index c1e3a81..8a5f735 100644 --- a/luajit-2.1/dynasm/dasm_arm64.lua +++ b/luajit-2.1/dynasm/dasm_arm64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM64 module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ diff --git a/luajit-2.1/dynasm/dasm_mips.h b/luajit-2.1/dynasm/dasm_mips.h index 2f4c2d2..4b49fd8 100644 --- a/luajit-2.1/dynasm/dasm_mips.h +++ b/luajit-2.1/dynasm/dasm_mips.h @@ -1,6 +1,6 @@ /* ** DynASM MIPS encoding engine. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -21,7 +21,7 @@ enum { /* The following actions need a buffer position. */ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, DASM__MAX }; @@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...) *pl = -pos; /* Label exists now. */ b[pos++] = ofs; /* Store pass1 offset estimate. */ break; - case DASM_IMM: + case DASM_IMM: case DASM_IMMS: #ifdef DASM_CHECKS CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); #endif @@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: pos++; break; + case DASM_IMM: case DASM_IMMS: pos++; break; } } stop: (void)0; @@ -356,7 +356,7 @@ int dasm_encode(Dst_DECL, void *buffer) if (ins & 2048) n = n - (int)((char *)cp - base); else - n = (n + (int)base) & 0x0fffffff; + n = (n + (int)(size_t)base) & 0x0fffffff; patchrel: CK((n & 3) == 0 && ((n + ((ins & 2048) ? 0x00020000 : 0)) >> @@ -367,6 +367,9 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); break; case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ case DASM_IMM: cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); break; diff --git a/luajit-2.1/dynasm/dasm_mips.lua b/luajit-2.1/dynasm/dasm_mips.lua index ef38343..78a4e34 100644 --- a/luajit-2.1/dynasm/dasm_mips.lua +++ b/luajit-2.1/dynasm/dasm_mips.lua @@ -1,17 +1,19 @@ ------------------------------------------------------------------------------ --- DynASM MIPS module. +-- DynASM MIPS32/MIPS64 module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ +local mips64 = mips64 + -- Module information: local _info = { - arch = "mips", - description = "DynASM MIPS module", + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", version = "1.4.0", vernum = 10400, - release = "2015-10-18", + release = "2016-05-24", author = "Mike Pall", license = "MIT", } @@ -27,7 +29,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char local match, gmatch = _s.match, _s.gmatch local concat, sort = table.concat, table.sort local bit = bit or require("bit") -local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex -- Inherited tables and callbacks. local g_opt, g_arch @@ -38,7 +41,7 @@ local wline, werror, wfatal, wwarn local action_names = { "STOP", "SECTION", "ESC", "REL_EXT", "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", + "REL_PC", "LABEL_PC", "IMM", "IMMS", } -- Maximum number of section buffer positions for dasm_put(). @@ -251,6 +254,10 @@ local map_op = { bnel_3 = "54000000STB", blezl_2 = "58000000SB", bgtzl_2 = "5c000000SB", + daddi_3 = mips64 and "60000000TSI", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", lb_2 = "80000000TO", lh_2 = "84000000TO", lwl_2 = "88000000TO", @@ -258,23 +265,30 @@ local map_op = { lbu_2 = "90000000TO", lhu_2 = "94000000TO", lwr_2 = "98000000TO", + lwu_2 = mips64 and "9c000000TO", sb_2 = "a0000000TO", sh_2 = "a4000000TO", swl_2 = "a8000000TO", sw_2 = "ac000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", swr_2 = "b8000000TO", cache_2 = "bc000000NO", ll_2 = "c0000000TO", lwc1_2 = "c4000000HO", pref_2 = "cc000000NO", ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", sc_2 = "e0000000TO", swc1_2 = "e4000000HO", + scd_2 = mips64 and "f0000000TO", sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", -- Opcode SPECIAL. nop_0 = "00000000", sll_3 = "00000000DTA", + sextw_2 = "00000000DT", movf_2 = "00000001DS", movf_3 = "00000001DSC", movt_2 = "00010001DS", @@ -285,6 +299,7 @@ local map_op = { sllv_3 = "00000004DTS", srlv_3 = "00000006DTS", rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", srav_3 = "00000007DTS", jr_1 = "00000008S", jalr_1 = "0000f809S", @@ -300,15 +315,22 @@ local map_op = { mthi_1 = "00000011S", mflo_1 = "00000012D", mtlo_1 = "00000013S", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", mult_2 = "00000018ST", multu_2 = "00000019ST", div_2 = "0000001aST", divu_2 = "0000001bST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + ddiv_2 = mips64 and "0000001eST", + ddivu_2 = mips64 and "0000001fST", add_3 = "00000020DST", - move_2 = "00000021DS", + move_2 = mips64 and "00000025DS" or "00000021DS", addu_3 = "00000021DST", sub_3 = "00000022DST", - negu_2 = "00000023DT", + negu_2 = mips64 and "0000002fDT" or "00000023DT", subu_3 = "00000023DST", and_3 = "00000024DST", or_3 = "00000025DST", @@ -317,6 +339,10 @@ local map_op = { nor_3 = "00000027DST", slt_3 = "0000002aDST", sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", tge_2 = "00000030ST", tge_3 = "00000030STZ", tgeu_2 = "00000031ST", @@ -329,6 +355,14 @@ local map_op = { teq_3 = "00000034STZ", tne_2 = "00000036ST", tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", -- Opcode REGIMM. bltz_2 = "04000000SB", @@ -356,13 +390,24 @@ local map_op = { msubu_2 = "70000005ST", clz_2 = "70000020DS=", clo_2 = "70000021DS=", + dclz_2 = mips64 and "70000024DS=", + dclo_2 = mips64 and "70000025DS=", sdbbp_0 = "7000003f", sdbbp_1 = "7000003fY", -- Opcode SPECIAL3. ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", seb_2 = "7c000420DT", seh_2 = "7c000620DT", rdhwr_2 = "7c00003bTD", @@ -370,8 +415,12 @@ local map_op = { -- Opcode COP0. mfc0_2 = "40000000TD", mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", mtc0_2 = "40800000TD", mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", rdpgpr_2 = "41400000DT", di_0 = "41606000", di_1 = "41606000T", @@ -388,9 +437,11 @@ local map_op = { -- Opcode COP1. mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", cfc1_2 = "44400000TG", mfhc1_2 = "44600000TG", mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", ctc1_2 = "44c00000TG", mthc1_2 = "44e00000TG", @@ -633,7 +684,7 @@ local function parse_fpr(expr) werror("bad register name `"..expr.."'") end -local function parse_imm(imm, bits, shift, scale, signed) +local function parse_imm(imm, bits, shift, scale, signed, action) local n = tonumber(imm) if n then local m = sar(n, scale) @@ -651,7 +702,8 @@ local function parse_imm(imm, bits, shift, scale, signed) match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then werror("expected immediate operand, got register") else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) return 0 end end @@ -763,6 +815,9 @@ map_op[".template__"] = function(params, template, nparams) n = n + 1 elseif p == "A" then op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) elseif p == "M" then op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 elseif p == "N" then diff --git a/luajit-2.1/dynasm/dasm_mips64.lua b/luajit-2.1/dynasm/dasm_mips64.lua new file mode 100644 index 0000000..5636b23 --- /dev/null +++ b/luajit-2.1/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/luajit-2.1/dynasm/dasm_ppc.h b/luajit-2.1/dynasm/dasm_ppc.h index 332c64d..3a7ee9b 100644 --- a/luajit-2.1/dynasm/dasm_ppc.h +++ b/luajit-2.1/dynasm/dasm_ppc.h @@ -1,6 +1,6 @@ /* ** DynASM PPC/PPC64 encoding engine. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/luajit-2.1/dynasm/dasm_ppc.lua b/luajit-2.1/dynasm/dasm_ppc.lua index 1e9bcca..f73974d 100644 --- a/luajit-2.1/dynasm/dasm_ppc.lua +++ b/luajit-2.1/dynasm/dasm_ppc.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM PPC/PPC64 module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. -- -- Support for various extensions contributed by Caio Souza Oliveira. diff --git a/luajit-2.1/dynasm/dasm_proto.h b/luajit-2.1/dynasm/dasm_proto.h index 93ca065..59d9e2b 100644 --- a/luajit-2.1/dynasm/dasm_proto.h +++ b/luajit-2.1/dynasm/dasm_proto.h @@ -1,6 +1,6 @@ /* ** DynASM encoding engine prototypes. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ diff --git a/luajit-2.1/dynasm/dasm_x64.lua b/luajit-2.1/dynasm/dasm_x64.lua index b1b6202..e8bdeb3 100644 --- a/luajit-2.1/dynasm/dasm_x64.lua +++ b/luajit-2.1/dynasm/dasm_x64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x64 module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ -- This module just sets 64 bit mode for the combined x86/x64 module. diff --git a/luajit-2.1/dynasm/dasm_x86.h b/luajit-2.1/dynasm/dasm_x86.h index 175febe..bc63635 100644 --- a/luajit-2.1/dynasm/dasm_x86.h +++ b/luajit-2.1/dynasm/dasm_x86.h @@ -1,6 +1,6 @@ /* ** DynASM x86 encoding engine. -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...) dasm_State *D = Dst_REF; dasm_ActList p = D->actionlist + start; dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs, mrm = 4; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; int *b; if (pos >= sec->epos) { @@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...) b[pos++] = n; switch (action) { case DASM_DISP: - if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ case DASM_IMM_D: ofs += 4; break; @@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...) case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; case DASM_SPACE: p++; ofs += n; break; case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ - case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); - if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; + case 2: n |= b[pos-2]; + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; } - mrm = 4; + mrm = -1; } else { int *pl, n; switch (action) { @@ -393,7 +400,22 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_IMM_W: dasmw(n); break; case DASM_VREG: { int t = *p++; - if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + n &= 7; + } + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } cp[-1] ^= n; break; } diff --git a/luajit-2.1/dynasm/dasm_x86.lua b/luajit-2.1/dynasm/dasm_x86.lua index 1fa80b5..4c031e2 100644 --- a/luajit-2.1/dynasm/dasm_x86.lua +++ b/luajit-2.1/dynasm/dasm_x86.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x86/x64 module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ @@ -41,7 +41,7 @@ local action_names = { -- int arg, 1 buffer pos: "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", -- action arg (1 byte), int arg, 1 buffer pos (reg/num): - "VREG", "SPACE", -- !x64: VREG support NYI. + "VREG", "SPACE", -- ptrdiff_t arg, 1 buffer pos (address): !x64 "SETLABEL", "REL_A", -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): @@ -83,6 +83,21 @@ local actargs = { 0 } -- Current number of section buffer positions for dasm_put(). local secpos = 1 +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + ------------------------------------------------------------------------------ -- Compute action numbers for action names. @@ -134,6 +149,21 @@ local function waction(action, a, num) if a or num then secpos = secpos + (num or 1) end end +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + -- Add call to embedded DynASM C code. local function wcall(func, args) wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) @@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) map_reg_valid_index[map_archdef.esp] = false if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end map_archdef["Ra"] = "@"..addrsize -- FP registers (internally tword sized, but use "f" as operand size). @@ -463,16 +494,24 @@ local function wputszarg(sz, n) end -- Put multi-byte opcode with operand-size dependent modifications. -local function wputop(sz, op, rex, vex) +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil if vex then local tail if vex.m == 1 and band(rex, 11) == 0 then - wputb(0xc5) + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) tail = shl(bxor(band(rex, 4), 4), 5) - else + psz = 3 + end + end + if not tail then wputb(0xc4) wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) tail = shl(band(rex, 8), 4) + psz = 4 end local reg, vreg = 0, nil if vex.v then @@ -482,12 +521,18 @@ local function wputop(sz, op, rex, vex) end if sz == "y" or vex.l then tail = tail + 4 end wputb(tail + shl(bxor(reg, 15), 3) + vex.p) - if vreg then waction("VREG", vreg); wputxb(4) end + wvreg("vex.v", vreg) rex = 0 if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end end local r - if rex ~= 0 and not x64 then werror("bad operand size") end if sz == "w" then wputb(102) end -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end @@ -496,20 +541,20 @@ local function wputop(sz, op, rex, vex) if rex ~= 0 then local opc3 = band(op, 0xffff00) if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then - wputb(64 + band(rex, 15)); rex = 0 + wputb(64 + band(rex, 15)); rex = 0; psz = 2 end end - wputb(shr(op, 16)); op = band(op, 0xffff) + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 end if op >= 256 then local b = shr(op, 8) - if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end - wputb(b) - op = band(op, 255) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 end - if rex ~= 0 then wputb(64 + band(rex, 15)) end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end if sz == "b" then op = op - 1 end wputb(op) + return psz, sk end -- Put ModRM or SIB formatted byte. @@ -519,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm) end -- Put ModRM/SIB plus optional displacement. -local function wputmrmsib(t, imark, s, vsreg) +local function wputmrmsib(t, imark, s, vsreg, psz, sk) local vreg, vxreg local reg, xreg = t.reg, t.xreg if reg and reg < 0 then reg = 0; vreg = t.vreg end @@ -529,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg) -- Register mode. if sub(t.mode, 1, 1) == "r" then wputmodrm(3, s, reg) - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(0) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) return end @@ -544,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg) -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) wputmodrm(0, s, 4) if imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) wputmodrm(t.xsc, xreg, 5) - if vxreg then waction("VREG", vxreg); wputxb(3) end + wvreg("sib.index", vxreg, psz+2, sk) else -- Pure 32 bit displacement. if x64 and tdisp ~= "table" then wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end wputmodrm(0, 4, 5) else riprel = x64 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end end - if vsreg then waction("VREG", vsreg); wputxb(2) end end if riprel then -- Emit rip-relative displacement. if match("UWSiI", imark) then @@ -586,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg) if xreg or band(reg, 7) == 4 then wputmodrm(m or 2, s, 4) -- ModRM. if m == nil or imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. - if vxreg then waction("VREG", vxreg); wputxb(3) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) else wputmodrm(m or 2, s, reg) -- ModRM. if (imark == "I" and (m == 1 or m == 2)) or (m == nil and (vsreg or vreg)) then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) end -- Put displacement. @@ -1521,6 +1567,12 @@ local map_op = { -- AVX, AVX2 integer ops -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", vlddqu_2 = "rxoy:F20FuF0rM", vmaskmovdqu_2 = "rro:660FuF7rM", vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", @@ -1621,6 +1673,10 @@ local map_op = { vpsravd_3 = "rrmoy:660F38V46rM", vpsrlvd_3 = "rrmoy:660F38V45rM", vpsrlvq_3 = "rrmoy:660F38VX45rM", + + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", } ------------------------------------------------------------------------------ @@ -1761,10 +1817,11 @@ local function dopattern(pat, args, sz, op, needrex) if t.xreg and t.xreg > 7 then rex = rex + 2 end if s > 7 then rex = rex + 4 end if needrex then rex = rex + 16 end - wputop(szov, opcode, rex, vex); opcode = nil + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil local imark = sub(pat, -1) -- Force a mark (ugly). -- Put ModRM/SIB with regno/last digit as spare. - wputmrmsib(t, imark, s, addin and addin.vreg) + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) addin = nil elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix local b = band(opcode, 255); opcode = shr(opcode, 8) @@ -1791,8 +1848,8 @@ local function dopattern(pat, args, sz, op, needrex) if szov == "q" and rex == 0 then rex = rex + 8 end if needrex then rex = rex + 16 end if addin and addin.reg == -1 then - wputop(szov, opcode - 7, rex, vex) - waction("VREG", addin.vreg); wputxb(0) + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) else if addin and addin.reg > 7 then rex = rex + 1 end wputop(szov, opcode, rex, vex) @@ -1836,7 +1893,7 @@ local function dopattern(pat, args, sz, op, needrex) local reg = a.reg if reg < 0 then wputb(0) - waction("VREG", a.vreg); wputxb(5) + wvreg("imm.hi", a.vreg) else wputb(shl(reg, 4)) end @@ -1988,8 +2045,8 @@ if x64 then rex = a.reg > 7 and 9 or 8 end end - wputop(sz, opcode, rex) - if vreg then waction("VREG", vreg); wputxb(0) end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) waction("IMM_D", format("(unsigned int)(%s)", op64)) waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) end diff --git a/luajit-2.1/dynasm/dynasm.lua b/luajit-2.1/dynasm/dynasm.lua index 145fb0c..5ec21a7 100644 --- a/luajit-2.1/dynasm/dynasm.lua +++ b/luajit-2.1/dynasm/dynasm.lua @@ -2,7 +2,7 @@ -- DynASM. A dynamic assembler for code generation engines. -- Originally designed and implemented for LuaJIT. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- See below for full copyright notice. ------------------------------------------------------------------------------ @@ -17,7 +17,7 @@ local _info = { url = "http://luajit.org/dynasm.html", license = "MIT", copyright = [[ -Copyright (C) 2005-2015 Mike Pall. All rights reserved. +Copyright (C) 2005-2017 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/luajit-2.1/etc/luajit.1 b/luajit-2.1/etc/luajit.1 index fd38b0a..0d263db 100644 --- a/luajit-2.1/etc/luajit.1 +++ b/luajit-2.1/etc/luajit.1 @@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" Runs some nested loops and shows the resulting traces. .SH COPYRIGHT .PP -\fBLuaJIT\fR is Copyright \(co 2005-2015 Mike Pall. +\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall. .br \fBLuaJIT\fR is open source software, released under the MIT license. .SH SEE ALSO diff --git a/luajit-2.1/etc/luajit.pc b/luajit-2.1/etc/luajit.pc index c99057f..0fdd1ef 100644 --- a/luajit-2.1/etc/luajit.pc +++ b/luajit-2.1/etc/luajit.pc @@ -2,7 +2,7 @@ majver=2 minver=1 relver=0 -version=${majver}.${minver}.${relver}-beta1 +version=${majver}.${minver}.${relver}-beta2 abiver=5.1 prefix=/usr/local diff --git a/luajit-2.1/src/Makefile b/luajit-2.1/src/Makefile index 9845f6a..f56465d 100644 --- a/luajit-2.1/src/Makefile +++ b/luajit-2.1/src/Makefile @@ -7,7 +7,7 @@ # Also works with MinGW and Cygwin on Windows. # Please check msvcbuild.bat for building with MSVC on Windows. # -# Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 @@ -110,6 +110,9 @@ XCFLAGS= #XCFLAGS+= -DLUAJIT_NUMMODE=1 #XCFLAGS+= -DLUAJIT_NUMMODE=2 # +# Enable GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_ENABLE_GC64 +# ############################################################################## ############################################################################## @@ -121,8 +124,8 @@ XCFLAGS= # # Use the system provided memory allocator (realloc) instead of the # bundled memory allocator. This is slower, but sometimes helpful for -# debugging. This option cannot be enabled on x64, since realloc usually -# doesn't return addresses in the right address range. +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and # the only way to get useful results from it for all other architectures. #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC @@ -166,10 +169,6 @@ else HOST_SYS= Windows HOST_MSYS= cygwin endif - # Use Clang for OSX host. - ifeq (Darwin,$(HOST_SYS)) - DEFAULT_CC= clang - endif endif ############################################################################## @@ -210,7 +209,7 @@ TARGET_CC= $(STATIC_CC) TARGET_STCC= $(STATIC_CC) TARGET_DYNCC= $(DYNAMIC_CC) TARGET_LD= $(CROSS)$(CC) -TARGET_AR= $(CROSS)ar rcus +TARGET_AR= $(CROSS)ar rcus 2>/dev/null TARGET_STRIP= $(CROSS)strip TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) @@ -243,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif TARGET_LJARCH= arm64 else ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) @@ -257,7 +259,11 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__MIPSEL__=1 endif - TARGET_LJARCH= mips + ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= mips64 + else + TARGET_LJARCH= mips + endif else $(error Unsupported target architecture) endif @@ -310,7 +316,6 @@ ifeq (Darwin,$(TARGET_SYS)) export MACOSX_DEPLOYMENT_TARGET=10.4 endif TARGET_STRIP+= -x - TARGET_AR+= 2>/dev/null TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) @@ -321,7 +326,6 @@ ifeq (Darwin,$(TARGET_SYS)) else ifeq (iOS,$(TARGET_SYS)) TARGET_STRIP+= -x - TARGET_AR+= 2>/dev/null TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) @@ -388,6 +392,11 @@ DASM_XFLAGS= DASM_AFLAGS= DASM_ARCH= $(TARGET_LJARCH) +ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ENDIAN_LE +else + DASM_AFLAGS+= -D ENDIAN_BE +endif ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D P64 endif @@ -473,7 +482,7 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_api.o lj_profile.o \ + lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ @@ -622,7 +631,7 @@ $(MINILUA_T): $(MINILUA_O) $(E) "HOSTLINK $@" $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) -host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) +host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua $(E) "DYNASM $@" $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) diff --git a/luajit-2.1/src/Makefile.dep b/luajit-2.1/src/Makefile.dep index 9aefb23..2b1cb5e 100644 --- a/luajit-2.1/src/Makefile.dep +++ b/luajit-2.1/src/Makefile.dep @@ -3,8 +3,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ + lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ + lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ lj_strfmt.h lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ @@ -94,7 +94,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_crecord.h lj_strfmt.h lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ - lj_ccallback.h + lj_ccallback.h lj_buf.h lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h @@ -163,7 +163,7 @@ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ - lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h + lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ @@ -188,6 +188,8 @@ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h +lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ @@ -213,19 +215,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ - lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ - lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ - lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ - lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ - lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ - lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ - lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ - lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ - lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ - lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ - lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ - lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ - lib_init.c + lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ + lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ + lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ + lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ + lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ + lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ + lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ + lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ + lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ + lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ + lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ + lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ + lib_ffi.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/luajit-2.1/src/host/buildvm.c b/luajit-2.1/src/host/buildvm.c index 324dd26..de23fab 100644 --- a/luajit-2.1/src/host/buildvm.c +++ b/luajit-2.1/src/host/buildvm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** This is a tool to build the hand-tuned assembler code required for ** LuaJIT's bytecode interpreter. It supports a variety of output formats @@ -110,7 +110,7 @@ static const char *sym_decorate(BuildCtx *ctx, if (p) { #if LJ_TARGET_X86ORX64 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) - name[0] = '@'; + name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ else *p = '\0'; #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE diff --git a/luajit-2.1/src/host/buildvm.h b/luajit-2.1/src/host/buildvm.h index 5588555..b90428d 100644 --- a/luajit-2.1/src/host/buildvm.h +++ b/luajit-2.1/src/host/buildvm.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _BUILDVM_H diff --git a/luajit-2.1/src/host/buildvm_asm.c b/luajit-2.1/src/host/buildvm_asm.c index 9b7ae53..ffd1490 100644 --- a/luajit-2.1/src/host/buildvm_asm.c +++ b/luajit-2.1/src/host/buildvm_asm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: Assembler source code emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" @@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) { int i; for (i = 0; i < n; i += 4) { + uint32_t ins = *(uint32_t *)(p+i); +#if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ +#endif if ((i & 15) == 0) - fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, "\t.long 0x%08x", ins); else - fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, ",0x%08x", ins); if ((i & 15) == 12) putc('\n', ctx->fp); } if ((n & 15) != 0) putc('\n', ctx->fp); @@ -214,7 +218,8 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc case BUILD_machasm: fprintf(ctx->fp, "\n\t.private_extern %s\n" - "%s:\n", name, name); + "\t.no_dead_strip %s\n" + "%s:\n", name, name, name); break; default: break; @@ -261,11 +266,20 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND /* This should really be moved into buildvm_arm.dasc. */ +#if LJ_ARCH_HASFPU + fprintf(ctx->fp, + ".fnstart\n" + ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n" + ".vsave {d8-d15}\n" + ".save {r4}\n" + ".pad #28\n"); +#else fprintf(ctx->fp, ".fnstart\n" ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" ".pad #28\n"); #endif +#endif #if LJ_TARGET_MIPS fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif diff --git a/luajit-2.1/src/host/buildvm_fold.c b/luajit-2.1/src/host/buildvm_fold.c index daed7ec..d579f4d 100644 --- a/luajit-2.1/src/host/buildvm_fold.c +++ b/luajit-2.1/src/host/buildvm_fold.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: IR folding hash table generator. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" @@ -9,7 +9,7 @@ /* Context for the folding hash table generator. */ static int lineno; -static int funcidx; +static uint32_t funcidx; static uint32_t foldkeys[BUILD_MAX_FOLD]; static uint32_t nkeys; diff --git a/luajit-2.1/src/host/buildvm_lib.c b/luajit-2.1/src/host/buildvm_lib.c index e928673..2956fdb 100644 --- a/luajit-2.1/src/host/buildvm_lib.c +++ b/luajit-2.1/src/host/buildvm_lib.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: library definition compiler. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" diff --git a/luajit-2.1/src/host/buildvm_libbc.h b/luajit-2.1/src/host/buildvm_libbc.h index 45f8f8c..b2600bd 100644 --- a/luajit-2.1/src/host/buildvm_libbc.h +++ b/luajit-2.1/src/host/buildvm_libbc.h @@ -15,7 +15,12 @@ static const uint8_t libbc_code[] = { 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0 +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #else 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, @@ -28,7 +33,12 @@ static const uint8_t libbc_code[] = { 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0 +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #endif }; @@ -40,6 +50,7 @@ static const struct { const char *name; int ofs; } libbc_map[] = { {"table_foreach",136}, {"table_getn",207}, {"table_remove",226}, -{NULL,355} +{"table_move",355}, +{NULL,502} }; diff --git a/luajit-2.1/src/host/buildvm_peobj.c b/luajit-2.1/src/host/buildvm_peobj.c index 4279f50..2eb2bb7 100644 --- a/luajit-2.1/src/host/buildvm_peobj.c +++ b/luajit-2.1/src/host/buildvm_peobj.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: PE object emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Only used for building on Windows, since we cannot assume the presence ** of a suitable assembler. The host and target byte order must match. @@ -109,6 +109,8 @@ enum { #if LJ_TARGET_X64 PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, +#elif LJ_TARGET_X86 + PEOBJ_SECT_SXDATA, #endif PEOBJ_SECT_RDATA_Z, PEOBJ_NSECTIONS @@ -208,6 +210,13 @@ void emit_peobj(BuildCtx *ctx) sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; +#elif LJ_TARGET_X86 + memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1); + pesect[PEOBJ_SECT_SXDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4); + pesect[PEOBJ_SECT_SXDATA].relocofs = sofs; + /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */ + pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240; #endif memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); @@ -232,7 +241,7 @@ void emit_peobj(BuildCtx *ctx) nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; #if LJ_TARGET_X64 - pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ + pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif /* Write PE object header and all sections. */ @@ -312,6 +321,19 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_X86 + /* Write .sxdata section. */ + for (i = 0; i < nrsym; i++) { + if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) { + uint32_t symidx = 1+2+i; + owrite(ctx, &symidx, 4); + break; + } + } + if (i == nrsym) { + fprintf(stderr, "Error: extern lj_err_unwind_win not used\n"); + exit(1); + } #endif /* Write .rdata$Z section. */ @@ -333,8 +355,10 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); - emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, + emit_peobj_sym(ctx, "lj_err_unwind_win", 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); +#elif LJ_TARGET_X86 + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA); #endif emit_peobj_sym(ctx, ctx->beginsym, 0, diff --git a/luajit-2.1/src/host/genlibbc.lua b/luajit-2.1/src/host/genlibbc.lua index 4398d8e..6f5a05c 100644 --- a/luajit-2.1/src/host/genlibbc.lua +++ b/luajit-2.1/src/host/genlibbc.lua @@ -2,7 +2,7 @@ -- Lua script to dump the bytecode of the library functions written in Lua. -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- diff --git a/luajit-2.1/src/host/genminilua.lua b/luajit-2.1/src/host/genminilua.lua index cd0d946..50feff0 100644 --- a/luajit-2.1/src/host/genminilua.lua +++ b/luajit-2.1/src/host/genminilua.lua @@ -2,7 +2,7 @@ -- Lua script to generate a customized, minified version of Lua. -- The resulting 'minilua' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- @@ -157,11 +157,11 @@ local function merge_includes(src) if includes[name] then return "" end includes[name] = true local fp = assert(io.open(LUA_SOURCE..name, "r")) - local src = fp:read("*a") + local inc = fp:read("*a") assert(fp:close()) - src = gsub(src, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "") - src = gsub(src, "#endif%s*$", "") - return merge_includes(src) + inc = gsub(inc, "#ifndef%s+%w+_h\n#define%s+%w+_h\n", "") + inc = gsub(inc, "#endif%s*$", "") + return merge_includes(inc) end) end @@ -300,6 +300,7 @@ local function strip_unused3(src) src = gsub(src, "if%([^\n]*hookmask[^\n]*&&\n[^\n]*%b{}\n", "") src = gsub(src, "(twoto%b()%()", "%1(size_t)") src = gsub(src, "i<sizenode", "i<(int)sizenode") + src = gsub(src, "cast%(unsigned int,key%-1%)", "cast(unsigned int,key)-1") return gsub(src, "\n\n+", "\n") end diff --git a/luajit-2.1/src/host/minilua.c b/luajit-2.1/src/host/minilua.c index aee192a..7915028 100644 --- a/luajit-2.1/src/host/minilua.c +++ b/luajit-2.1/src/host/minilua.c @@ -1606,7 +1606,7 @@ luaC_barriert(L,t,key); return gval(mp); } static const TValue*luaH_getnum(Table*t,int key){ -if(cast(unsigned int,key-1)<cast(unsigned int,t->sizearray)) +if(cast(unsigned int,key)-1<cast(unsigned int,t->sizearray)) return&t->array[key-1]; else{ lua_Number nk=cast_num(key); diff --git a/luajit-2.1/src/jit/bc.lua b/luajit-2.1/src/jit/bc.lua index 320039f..193cf01 100644 --- a/luajit-2.1/src/jit/bc.lua +++ b/luajit-2.1/src/jit/bc.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT bytecode listing module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- diff --git a/luajit-2.1/src/jit/bcsave.lua b/luajit-2.1/src/jit/bcsave.lua index 38fc61f..c17c88e 100644 --- a/luajit-2.1/src/jit/bcsave.lua +++ b/luajit-2.1/src/jit/bcsave.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT module to save/list bytecode. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -63,8 +63,8 @@ local map_type = { } local map_arch = { - x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, - mips = true, mipsel = true, + x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, + ppc = true, mips = true, mipsel = true, } local map_os = { @@ -125,12 +125,12 @@ extern "C" #ifdef _WIN32 __declspec(dllexport) #endif -const char %s%s[] = { +const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) else fp:write(string.format([[ #define %s%s_SIZE %d -static const char %s%s[] = { +static const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) end local t, n, m = {}, 0, 0 @@ -200,7 +200,7 @@ typedef struct { ]] local symname = LJBC_PREFIX..ctx.modname local is64, isbe = false, false - if ctx.arch == "x64" or ctx.arch == "arm64" then + if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then is64 = true elseif ctx.arch == "ppc" or ctx.arch == "mips" then isbe = true @@ -237,9 +237,9 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) + hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) if ctx.arch == "mips" or ctx.arch == "mipsel" then - hdr.flags = 0x50001006 + hdr.flags = f32(0x50001006) end hdr.version = f32(1) hdr.shofs = fofs(ffi.offsetof(o, "sect")) diff --git a/luajit-2.1/src/jit/dis_arm.lua b/luajit-2.1/src/jit/dis_arm.lua index dfcbeee..c2dd776 100644 --- a/luajit-2.1/src/jit/dis_arm.lua +++ b/luajit-2.1/src/jit/dis_arm.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT ARM disassembler module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -12,7 +12,7 @@ local type = type local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local match, gmatch = string.match, string.gmatch local concat = table.concat local bit = require("bit") local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex diff --git a/luajit-2.1/src/jit/dis_arm64.lua b/luajit-2.1/src/jit/dis_arm64.lua new file mode 100644 index 0000000..a717332 --- /dev/null +++ b/luajit-2.1/src/jit/dis_arm64.lua @@ -0,0 +1,1216 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64 disassembler module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +-- Sponsored by Cisco Systems, Inc. +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most user-mode AArch64 instructions. +-- NYI: Advanced SIMD and VFP instructions. +------------------------------------------------------------------------------ + +local type = type +local sub, byte, format = string.sub, string.byte, string.format +local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local concat = table.concat +local bit = require("bit") +local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local ror = bit.ror + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +local map_adr = { -- PC-relative addressing. + shift = 31, mask = 1, + [0] = "adrDBx", "adrpDBx" +} + +local map_addsubi = { -- Add/subtract immediate. + shift = 29, mask = 3, + [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", +} + +local map_logi = { -- Logical immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + } +} + +local map_movwi = { -- Move wide immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, +} + +local map_bitf = { -- Bitfield. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", + "bfm|bfi|bfxilDN13w", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" + } + }, + { + shift = 22, mask = 1, + { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", + "bfm|bfi|bfxilDN13x", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" + } + } +} + +local map_datai = { -- Data processing - immediate. + shift = 23, mask = 7, + [0] = map_adr, map_adr, map_addsubi, false, + map_logi, map_movwi, map_bitf, + { + shift = 15, mask = 0x1c0c1, + [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", + [0x10081] = "extr|rorDNM4x" + } +} + +local map_logsr = { -- Logical, shifted register. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + } +} + +local map_assh = { + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMSg", "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + }, + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + } + } +} + +local map_addsubsh = { -- Add/subtract, shifted register. + shift = 22, mask = 3, + [0] = map_assh, map_assh, map_assh +} + +local map_addsubex = { -- Add/subtract, extended register. + shift = 22, mask = 3, + [0] = { + shift = 29, mask = 3, + [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", + } +} + +local map_addsubc = { -- Add/subtract, with carry. + shift = 10, mask = 63, + [0] = { + shift = 29, mask = 3, + [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", + } +} + +local map_ccomp = { + shift = 4, mask = 1, + [0] = { + shift = 10, mask = 3, + [0] = { -- Conditional compare register. + shift = 29, mask = 3, + "ccmnNMVCg", false, "ccmpNMVCg", + }, + [2] = { -- Conditional compare immediate. + shift = 29, mask = 3, + "ccmnN5VCg", false, "ccmpN5VCg", + } + } +} + +local map_csel = { -- Conditional select. + shift = 11, mask = 1, + [0] = { + shift = 10, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, + }, + { + shift = 29, mask = 3, + [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, + } + } +} + +local map_data1s = { -- Data processing, 1 source. + shift = 29, mask = 1, + [0] = { + shift = 31, mask = 1, + [0] = { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" + }, + { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" + } + } +} + +local map_data2s = { -- Data processing, 2 sources. + shift = 29, mask = 1, + [0] = { + shift = 10, mask = 63, + false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", + "lsrDNMg", "asrDNMg", "rorDNMg" + } +} + +local map_data3s = { -- Data processing, 3 sources. + shift = 29, mask = 7, + [0] = { + shift = 21, mask = 7, + [0] = { + shift = 15, mask = 1, + [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" + } + }, false, false, false, + { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 7, + [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, + false, "umaddl|umullDxNMwA0x", "umulhDNMx" + }, + { + shift = 21, mask = 7, + [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, + false, "umsubl|umneglDxNMwA0x" + } + } +} + +local map_datar = { -- Data processing, register. + shift = 28, mask = 1, + [0] = { + shift = 24, mask = 1, + [0] = map_logsr, + { + shift = 21, mask = 1, + [0] = map_addsubsh, map_addsubex + } + }, + { + shift = 21, mask = 15, + [0] = map_addsubc, false, map_ccomp, false, map_csel, false, + { + shift = 30, mask = 1, + [0] = map_data2s, map_data1s + }, + false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, + map_data3s, map_data3s, map_data3s + } +} + +local map_lrl = { -- Load register, literal. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = "ldrDwB", "ldrDxB", "ldrswDxB" + }, + { + shift = 30, mask = 3, + [0] = "ldrDsB", "ldrDdB" + } +} + +local map_lsriind = { -- Load/store register, immediate pre/post-indexed. + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDszL", "ldrDszL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDxzL", "ldrDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDdzL", "ldrDdzL" + } + } +} + +local map_lsriro = { + shift = 21, mask = 1, + [0] = { -- Load/store register immediate. + shift = 10, mask = 3, + [0] = { -- Unscaled immediate. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "sturbDwK", "ldurbDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturhDwK", "ldurhDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDwK", "ldurDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDxK", "ldurDxK" + } + } + }, map_lsriind, false, map_lsriind + }, + { -- Load/store register, register offset. + shift = 10, mask = 3, + [2] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" + }, + { + shift = 22, mask = 3, + [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" + }, + { + shift = 22, mask = 3, + [0] = "strDwO", "ldrDwO", "ldrswDxO" + }, + { + shift = 22, mask = 3, + [0] = "strDxO", "ldrDxO" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDsO", "ldrDsO" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdO", "ldrDdO" + } + } + } + } +} + +local map_lsp = { -- Load/store register pair, offset. + shift = 22, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "stpDzAzwP", "stpDzAzsP", + }, + { + shift = 26, mask = 1, + "stpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "stpDzAzxP" + } + }, + { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "ldpDzAzwP", "ldpDzAzsP", + }, + { + shift = 26, mask = 1, + [0] = "ldpswDAxP", "ldpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "ldpDzAzxP" + } + } +} + +local map_ls = { -- Loads and stores. + shift = 24, mask = 0x31, + [0x10] = map_lrl, [0x30] = map_lsriro, + [0x20] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x21] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x31] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzU", "ldrbDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strhDwzU", "ldrhDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDwzU", "ldrDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDxzU", "ldrDxzU" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDszU", "ldrDszU" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdzU", "ldrDdzU" + } + } + }, +} + +local map_datafp = { -- Data processing, SIMD and FP. + shift = 28, mask = 7, + { -- 001 + shift = 24, mask = 1, + [0] = { + shift = 21, mask = 1, + { + shift = 10, mask = 3, + [0] = { + shift = 12, mask = 1, + [0] = { + shift = 13, mask = 1, + [0] = { + shift = 14, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { -- FP/int conversion. + shift = 31, mask = 1, + [0] = { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", + [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", + [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", + [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", + [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", + [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", + [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", + [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", + [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", + [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", + [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", + [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", + [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" + }, + { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", + [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", + [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", + [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", + [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", + [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", + [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", + [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", + [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", + [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", + [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", + [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", + [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" + } + } + }, + { -- FP data-processing, 1 source. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", false, "fcvtDdNs", false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + }, + { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", "fcvtDsNd", false, false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + } + } + } + }, + { -- FP compare. + shift = 31, mask = 1, + [0] = { + shift = 14, mask = 3, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 0, mask = 31, + [0] = "fcmpNMf", [8] = "fcmpNZf", + [16] = "fcmpeNMf", [24] = "fcmpeNZf", + } + } + } + } + }, + { -- FP immediate. + shift = 31, mask = 1, + [0] = { + shift = 5, mask = 31, + [0] = { + shift = 23, mask = 1, + [0] = "fmovDFf" + } + } + } + }, + { -- FP conditional compare. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 4, mask = 1, + [0] = "fccmpNMVCf", "fccmpeNMVCf" + } + } + }, + { -- FP data-processing, 2 sources. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 12, mask = 15, + [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", + "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", + "fnmulDNMf" + } + } + }, + { -- FP conditional select. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = "fcselDNMCf" + } + } + } + }, + { -- FP data-processing, 3 sources. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 5, + [0] = "fmaddDNMAf", "fnmaddDNMAf" + }, + { + shift = 21, mask = 5, + [0] = "fmsubDNMAf", "fnmsubDNMAf" + } + } + } + } +} + +local map_br = { -- Branches, exception generating and system instructions. + shift = 29, mask = 7, + [0] = "bB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" + }, + { -- Conditional branch, immediate. + shift = 24, mask = 3, + [0] = { + shift = 4, mask = 1, + [0] = { + shift = 0, mask = 15, + [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", + "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" + } + } + }, false, "blB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" + }, + { + shift = 24, mask = 3, + [0] = { -- Exception generation. + shift = 0, mask = 0xe0001f, + [0x200000] = "brkW" + }, + { -- System instructions. + shift = 0, mask = 0x3fffff, + [0x03201f] = "nop" + }, + { -- Unconditional branch, register. + shift = 0, mask = 0xfffc1f, + [0x1f0000] = "brNx", [0x3f0000] = "blrNx", + [0x5f0000] = "retNx" + }, + } +} + +local map_init = { + shift = 25, mask = 15, + [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, + map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp +} + +------------------------------------------------------------------------------ + +local map_regs = { x = {}, w = {}, d = {}, s = {} } + +for i=0,30 do + map_regs.x[i] = "x"..i + map_regs.w[i] = "w"..i + map_regs.d[i] = "d"..i + map_regs.s[i] = "s"..i +end +map_regs.x[31] = "sp" +map_regs.w[31] = "wsp" +map_regs.d[31] = "d31" +map_regs.s[31] = "s31" + +local map_cond = { + [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", +} + +local map_shift = { [0] = "lsl", "lsr", "asr", } + +local map_extend = { + [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then + extra = "\t->"..sym + end + end + if ctx.hexdump > 0 then + ctx.out(format("%08x %s %-5s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) + else + ctx.out(format("%08x %-5s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + ctx.pos = pos + 4 +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function match_reg(p, pat, regnum) + return map_regs[match(pat, p.."%w-([xwds])")][regnum] +end + +local function fmt_hex32(x) + if x < 0 then + return tohex(x) + else + return format("%x", x) + end +end + +local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } + +local function decode_imm13(op) + local imms = band(rshift(op, 10), 63) + local immr = band(rshift(op, 16), 63) + if band(op, 0x00400000) == 0 then + local len = 5 + if imms >= 56 then + if imms >= 60 then len = 1 else len = 2 end + elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end + local l = lshift(1, len)-1 + local s = band(imms, l) + local r = band(immr, l) + local imm = ror(rshift(-1, 31-s), r) + if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end + imm = imm * imm13_rep[len] + local ix = fmt_hex32(imm) + if rshift(op, 31) ~= 0 then + return ix..tohex(imm) + else + return ix + end + else + local lo, hi = -1, 0 + if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end + if immr ~= 0 then + lo, hi = ror(lo, immr), ror(hi, immr) + local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) + lo, hi = bxor(lo, x), bxor(hi, x) + if immr >= 32 then lo, hi = hi, lo end + end + if hi ~= 0 then + return fmt_hex32(hi)..tohex(lo) + else + return fmt_hex32(lo) + end + end +end + +local function parse_immpc(op, name) + if name == "b" or name == "bl" then + return arshift(lshift(op, 6), 4) + elseif name == "adr" or name == "adrp" then + local immlo = band(rshift(op, 29), 3) + local immhi = lshift(arshift(lshift(op, 8), 13), 2) + return bor(immhi, immlo) + elseif name == "tbz" or name == "tbnz" then + return lshift(arshift(lshift(op, 13), 18), 2) + else + return lshift(arshift(lshift(op, 8), 13), 2) + end +end + +local function parse_fpimm8(op) + local sign = band(op, 0x100000) == 0 and 1 or -1 + local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 + local frac = 16+band(rshift(op, 13), 15) + return sign * frac * 2^exp +end + +local function prefer_bfx(sf, uns, imms, immr) + if imms < immr or imms == 31 or imms == 63 then + return false + end + if immr == 0 then + if sf == 0 and (imms == 7 or imms == 15) then + return false + end + if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then + return false + end + end + return true +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local pos = ctx.pos + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + local operands = {} + local suffix = "" + local last, name, pat + local map_reg + ctx.op = op + ctx.rel = nil + last = nil + local opat + opat = map_init[band(rshift(op, 25), 15)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + name, pat = match(opat, "^([a-z0-9]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + if altname then pat = pat2 end + if sub(pat, 1, 1) == "." then + local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") + suffix = suffix..s2 + pat = p2 + end + + local rt = match(pat, "[gf]") + if rt then + if rt == "g" then + map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w + else + map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s + end + end + + local second0, immr + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + local regnum = band(op, 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "N" then + local regnum = band(rshift(op, 5), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "M" then + local regnum = band(rshift(op, 16), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "A" then + local regnum = band(rshift(op, 10), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "B" then + local addr = ctx.addr + pos + parse_immpc(op, name) + ctx.rel = addr + x = "0x"..tohex(addr) + elseif p == "T" then + x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) + elseif p == "V" then + x = band(op, 15) + elseif p == "C" then + x = map_cond[band(rshift(op, 12), 15)] + elseif p == "c" then + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + local cond = band(rshift(op, 12), 15) + local invc = bxor(cond, 1) + x = map_cond[cond] + if altname and cond ~= 14 and cond ~= 15 then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if rn == rm then + local n = #operands + operands[n] = nil + x = map_cond[invc] + if rn ~= 31 then + if a1 then name = a1 else name = altname end + else + operands[n-1] = nil + name = a2 + end + end + end + elseif p == "W" then + x = band(rshift(op, 5), 0xffff) + elseif p == "Y" then + x = band(rshift(op, 5), 0xffff) + local hw = band(rshift(op, 21), 3) + if altname and (hw == 0 or x ~= 0) then + name = altname + end + elseif p == "L" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if band(op, 0x800) ~= 0 then + x = "["..rn..", #"..imm9.."]!" + else + x = "["..rn.."], #"..imm9 + end + elseif p == "U" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local sz = band(rshift(op, 30), 3) + local imm12 = lshift(arshift(lshift(op, 10), 20), sz) + if imm12 ~= 0 then + x = "["..rn..", #"..imm12.."]" + else + x = "["..rn.."]" + end + elseif p == "K" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if imm9 ~= 0 then + x = "["..rn..", #"..imm9.."]" + else + x = "["..rn.."]" + end + elseif p == "O" then + local rn, rm = map_regs.x[band(rshift(op, 5), 31)] + local m = band(rshift(op, 13), 1) + if m == 0 then + rm = map_regs.w[band(rshift(op, 16), 31)] + else + rm = map_regs.x[band(rshift(op, 16), 31)] + end + x = "["..rn..", "..rm + local opt = band(rshift(op, 13), 7) + local s = band(rshift(op, 12), 1) + local sz = band(rshift(op, 30), 3) + -- extension to be applied + if opt == 3 then + if s == 0 then x = x.."]" + else x = x..", lsl #"..sz.."]" end + elseif opt == 2 or opt == 6 or opt == 7 then + if s == 0 then x = x..", "..map_extend[opt].."]" + else x = x..", "..map_extend[opt].." #"..sz.."]" end + else + x = x.."]" + end + elseif p == "P" then + local opcv, sh = rshift(op, 26), 2 + if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end + local imm7 = lshift(arshift(lshift(op, 10), 25), sh) + local rn = map_regs.x[band(rshift(op, 5), 31)] + local ind = band(rshift(op, 23), 3) + if ind == 1 then + x = "["..rn.."], #"..imm7 + elseif ind == 2 then + if imm7 == 0 then + x = "["..rn.."]" + else + x = "["..rn..", #"..imm7.."]" + end + elseif ind == 3 then + x = "["..rn..", #"..imm7.."]!" + end + elseif p == "I" then + local shf = band(rshift(op, 22), 3) + local imm12 = band(rshift(op, 10), 0x0fff) + local rn, rd = band(rshift(op, 5), 31), band(op, 31) + if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then + name = altname + x = nil + elseif shf == 0 then + x = imm12 + elseif shf == 1 then + x = imm12..", lsl #12" + end + elseif p == "i" then + x = "#0x"..decode_imm13(op) + elseif p == "1" then + immr = band(rshift(op, 16), 63) + x = immr + elseif p == "2" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2, a3, a4, a5, a6 = + match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") + local sf = band(rshift(op, 26), 32) + local uns = band(rshift(op, 30), 1) + if prefer_bfx(sf, uns, x, immr) then + name = a2 + x = x - immr + 1 + elseif immr == 0 and x == 7 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a6 + x = nil + elseif immr == 0 and x == 15 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a5 + x = nil + elseif x == 31 or x == 63 then + if x == 31 and immr == 0 and name == "sbfm" then + name = a4 + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + else + name = a3 + end + x = nil + elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then + name = a4 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = nil + elseif x < immr then + name = a1 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + end + end + elseif p == "3" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if x < immr then + name = a1 + local sf = band(rshift(op, 26), 32) + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + elseif x >= immr then + name = a2 + x = x - immr + 1 + end + end + elseif p == "4" then + x = band(rshift(op, 10), 63) + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + if altname and rn == rm then + local n = #operands + operands[n] = nil + last = operands[n-1] + name = altname + end + elseif p == "5" then + x = band(rshift(op, 16), 31) + elseif p == "S" then + x = band(rshift(op, 10), 63) + if x == 0 then x = nil + else x = map_shift[band(rshift(op, 22), 3)].." #"..x end + elseif p == "X" then + local opt = band(rshift(op, 13), 7) + -- Width specifier <R>. + if opt ~= 3 and opt ~= 7 then + last = map_regs.w[band(rshift(op, 16), 31)] + operands[#operands] = last + end + x = band(rshift(op, 10), 7) + -- Extension. + if opt == 2 + band(rshift(op, 31), 1) and + band(rshift(op, second0 and 5 or 0), 31) == 31 then + if x == 0 then x = nil + else x = "lsl #"..x end + else + if x == 0 then x = map_extend[band(rshift(op, 13), 7)] + else x = map_extend[band(rshift(op, 13), 7)].." #"..x end + end + elseif p == "R" then + x = band(rshift(op,21), 3) + if x == 0 then x = nil + else x = "lsl #"..x*16 end + elseif p == "z" then + local n = #operands + if operands[n] == "sp" then operands[n] = "xzr" + elseif operands[n] == "wsp" then operands[n] = "wzr" + end + elseif p == "Z" then + x = 0 + elseif p == "F" then + x = parse_fpimm8(op) + elseif p == "g" or p == "f" or p == "x" or p == "w" or + p == "d" or p == "s" then + -- These are handled in D/N/M/A. + elseif p == "0" then + if last == "sp" or last == "wsp" then + local n = #operands + operands[n] = nil + last = operands[n-1] + if altname then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if not a1 then + name = altname + elseif second0 then + name, altname = a2, a1 + else + name, altname = a1, a2 + end + end + end + second0 = true + else + assert(false) + end + if x then + last = x + if type(x) == "number" then x = "#"..x end + operands[#operands+1] = x + end + end + + return putop(ctx, name..suffix, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then ofs = 0 end + local stop = len and ofs+len or #ctx.code + ctx.pos = ofs + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass() +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_regs.x[r] end + return map_regs.d[r-32] +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname +} + diff --git a/luajit-2.1/src/jit/dis_arm64be.lua b/luajit-2.1/src/jit/dis_arm64be.lua new file mode 100644 index 0000000..7eb389e --- /dev/null +++ b/luajit-2.1/src/jit/dis_arm64be.lua @@ -0,0 +1,12 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64BE disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- ARM64 instructions are always little-endian. So just forward to the +-- common ARM64 disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +return require((string.match(..., ".*%.") or "").."dis_arm64") + diff --git a/luajit-2.1/src/jit/dis_mips.lua b/luajit-2.1/src/jit/dis_mips.lua index 9466f45..a12b8e6 100644 --- a/luajit-2.1/src/jit/dis_mips.lua +++ b/luajit-2.1/src/jit/dis_mips.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPS disassembler module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT/X license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -11,8 +11,8 @@ ------------------------------------------------------------------------------ local type = type -local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch local concat = table.concat local bit = require("bit") local band, bor, tohex = bit.band, bit.bor, bit.tohex @@ -34,15 +34,17 @@ local map_special = { "jrS", "jalrD1S", "movzDST", "movnDST", "syscallY", "breakY", false, "sync", "mfhiD", "mthiS", "mfloD", "mtloS", - false, false, false, false, + "dsllvDST", false, "dsrlvDST", "dsravDST", "multST", "multuST", "divST", "divuST", - false, false, false, false, + "dmultST", "dmultuST", "ddivST", "ddivuST", "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", - "andDST", "orDST", "xorDST", "nor|notDST0", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", false, false, "sltDST", "sltuDST", - false, false, false, false, + "daddDST", "dadduDST", "dsubDST", "dsubuDST", "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", - "teqSTZ", false, "tneSTZ", + "teqSTZ", false, "tneSTZ", false, + "dsllDTA", false, "dsrlDTA", "dsraDTA", + "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", } local map_special2 = { @@ -60,11 +62,17 @@ local map_bshfl = { [24] = "sehDT", } +local map_dbshfl = { + shift = 6, mask = 31, + [2] = "dsbhDT", + [5] = "dshdDT", +} + local map_special3 = { shift = 0, mask = 63, - [0] = "extTSAK", [4] = "insTSAL", - [32] = map_bshfl, - [59] = "rdhwrTD", + [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", + [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", + [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD", } local map_regimm = { @@ -178,8 +186,8 @@ local map_cop1bc = { local map_cop1 = { shift = 21, mask = 31, - [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", - "mtc1TG", false, "ctc1TG", "mthc1TG", + [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", + "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", map_cop1bc, false, false, false, false, false, false, false, map_cop1s, map_cop1d, false, false, @@ -213,16 +221,16 @@ local map_pri = { "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", map_cop0, map_cop1, false, map_cop1x, "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", - false, false, false, false, - map_special2, false, false, map_special3, + "daddiTSI", "daddiuTSI", false, false, + map_special2, "jalxJ", false, map_special3, "lbTSO", "lhTSO", "lwlTSO", "lwTSO", "lbuTSO", "lhuTSO", "lwrTSO", false, "sbTSO", "shTSO", "swlTSO", "swTSO", false, false, "swrTSO", "cacheNSO", "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", - false, "ldc1HSO", "ldc2TSO", false, + false, "ldc1HSO", "ldc2TSO", "ldTSO", "scTSO", "swc1HSO", "swc2TSO", false, - false, "sdc1HSO", "sdc2TSO", false, + false, "sdc1HSO", "sdc2TSO", "sdTSO", } ------------------------------------------------------------------------------ @@ -306,6 +314,8 @@ local function disass_ins(ctx) x = "f"..band(rshift(op, 21), 31) elseif p == "A" then x = band(rshift(op, 6), 31) + elseif p == "E" then + x = band(rshift(op, 6), 31) + 32 elseif p == "M" then x = band(rshift(op, 11), 31) elseif p == "N" then @@ -315,8 +325,12 @@ local function disass_ins(ctx) if x == 0 then x = nil end elseif p == "K" then x = band(rshift(op, 11), 31) + 1 + elseif p == "P" then + x = band(rshift(op, 11), 31) + 33 elseif p == "L" then x = band(rshift(op, 11), 31) - last + 1 + elseif p == "Q" then + x = band(rshift(op, 11), 31) - last + 33 elseif p == "I" then x = arshift(lshift(op, 16), 16) elseif p == "U" then @@ -330,11 +344,12 @@ local function disass_ins(ctx) elseif p == "B" then x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 ctx.rel = x - x = "0x"..tohex(x) + x = format("0x%08x", x) elseif p == "J" then - x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 + local a = ctx.addr + ctx.pos + x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4 ctx.rel = x - x = "0x"..tohex(x) + x = format("0x%08x", x) elseif p == "V" then x = band(rshift(op, 8), 7) if x == 0 then x = nil end diff --git a/luajit-2.1/src/jit/dis_mips64.lua b/luajit-2.1/src/jit/dis_mips64.lua new file mode 100644 index 0000000..c437492 --- /dev/null +++ b/luajit-2.1/src/jit/dis_mips64.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the big-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create, + disass = dis_mips.disass, + regname = dis_mips.regname +} + diff --git a/luajit-2.1/src/jit/dis_mips64el.lua b/luajit-2.1/src/jit/dis_mips64el.lua new file mode 100644 index 0000000..2b1470a --- /dev/null +++ b/luajit-2.1/src/jit/dis_mips64el.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64EL disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the little-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_el, + disass = dis_mips.disass_el, + regname = dis_mips.regname +} + diff --git a/luajit-2.1/src/jit/dis_mipsel.lua b/luajit-2.1/src/jit/dis_mipsel.lua index f06ffe8..f69b11f 100644 --- a/luajit-2.1/src/jit/dis_mipsel.lua +++ b/luajit-2.1/src/jit/dis_mipsel.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPSEL disassembler wrapper module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the little-endian functions from the diff --git a/luajit-2.1/src/jit/dis_ppc.lua b/luajit-2.1/src/jit/dis_ppc.lua index e077d7a..2aeb1b2 100644 --- a/luajit-2.1/src/jit/dis_ppc.lua +++ b/luajit-2.1/src/jit/dis_ppc.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT PPC disassembler module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT/X license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -13,7 +13,7 @@ ------------------------------------------------------------------------------ local type = type -local sub, byte, format = string.sub, string.byte, string.format +local byte, format = string.byte, string.format local match, gmatch, gsub = string.match, string.gmatch, string.gsub local concat = table.concat local bit = require("bit") diff --git a/luajit-2.1/src/jit/dis_x64.lua b/luajit-2.1/src/jit/dis_x64.lua index 15d5524..d5714ee 100644 --- a/luajit-2.1/src/jit/dis_x64.lua +++ b/luajit-2.1/src/jit/dis_x64.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT x64 disassembler wrapper module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the 64 bit functions from the combined diff --git a/luajit-2.1/src/jit/dis_x86.lua b/luajit-2.1/src/jit/dis_x86.lua index 49bbcad..4371233 100644 --- a/luajit-2.1/src/jit/dis_x86.lua +++ b/luajit-2.1/src/jit/dis_x86.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT x86/x64 disassembler module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -158,8 +158,8 @@ local map_opc2 = { "||punpcklqdqXrvm","||punpckhqdqXrvm", "movPrVSm","movqMrm|movdquXrm|movdqaXrm", --7x -"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", -"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", +"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", +"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", nil,nil, @@ -239,8 +239,12 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", --8x [0x8c] = "||pmaskmovXrvVSm", [0x8e] = "||pmaskmovVSmXvr", +--Dx +[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", +[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", --Fx [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", +[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", }, ["3a"] = { -- [66] 0f 3a xx @@ -262,12 +266,16 @@ nil,nil,nil,nil, [0x40] = "||dppsXrvmu", [0x41] = "||dppdXrvmu", [0x42] = "||mpsadbwXrvmu", +[0x44] = "||pclmulqdqXrvmu", [0x46] = "||perm2i128Xrvmu", [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", [0x4c] = "||pblendvbXrvmb", --6x [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", +[0xdf] = "||aeskeygenassistXrmu", +--Fx +[0xf0] = "||| rorxVrmu", }, } @@ -409,8 +417,8 @@ local function putop(ctx, text, operands) (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. (ctx.vexl and "l" or "") if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end - if t ~= "" then text = ctx.rex.."."..t.." "..text - elseif ctx.rex == "vex" then text = "v"..text end + if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") + elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false ctx.rex = false; ctx.vexl = false; ctx.vexv = false end @@ -810,7 +818,7 @@ map_act = { m = b%32; b = (b-m)/32 local nb = b%2; b = (b-nb)/2 if nb == 0 then ctx.rexb = true end - local nx = b%2; b = (b-nx)/2 + local nx = b%2 if nx == 0 then ctx.rexx = true end b = byte(ctx.code, pos, pos) if not b then return incomplete(ctx) end diff --git a/luajit-2.1/src/jit/dump.lua b/luajit-2.1/src/jit/dump.lua index b1cdcfe..2bea652 100644 --- a/luajit-2.1/src/jit/dump.lua +++ b/luajit-2.1/src/jit/dump.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT compiler dump module. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -63,9 +63,9 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr local bit = require("bit") -local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex +local band, shr, tohex = bit.band, bit.rshift, bit.tohex local sub, gsub, format = string.sub, string.gsub, string.format -local byte, char, rep = string.byte, string.char, string.rep +local byte, rep = string.byte, string.rep local type, tostring = type, tostring local stdout, stderr = io.stdout, io.stderr @@ -85,7 +85,7 @@ local nexitsym = 0 local function fillsymtab_tr(tr, nexit) local t = {} symtabmt.__index = t - if jit.arch == "mips" or jit.arch == "mipsel" then + if jit.arch:sub(1, 4) == "mips" then t[traceexitstub(tr, 0)] = "exit" return end @@ -213,7 +213,7 @@ local colortype_ansi = { "\027[35m%s\027[m", } -local function colorize_text(s, t) +local function colorize_text(s) return s end @@ -310,15 +310,17 @@ local function fmtfunc(func, pc) end end -local function formatk(tr, idx) +local function formatk(tr, idx, sn) local k, t, slot = tracek(tr, idx) local tn = type(k) local s if tn == "number" then - if k == 2^52+2^51 then + if band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then s = "bias" else - s = format("%+.14g", k) + s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) end elseif tn == "string" then s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) @@ -331,11 +333,13 @@ local function formatk(tr, idx) s = format("userdata:%p", k) else s = format("[%p]", k) - if s == "[0x00000000]" then s = "NULL" end + if s == "[NULL]" then s = "NULL" end end elseif t == 21 then -- int64_t s = sub(tostring(k), 1, -3) if sub(s, 1, 1) ~= "-" then s = "+"..s end + elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL) + return "----" -- Special case for LJ_FR2 slot 1. else s = tostring(k) -- For primitives. end @@ -354,7 +358,7 @@ local function printsnap(tr, snap) n = n + 1 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS if ref < 0 then - out:write(formatk(tr, ref)) + out:write(formatk(tr, ref, sn)) elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else @@ -552,7 +556,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) if what == "start" then if dumpmode.H then out:write('<pre class="ljdump">\n') end out:write("---- TRACE ", tr, " ", what) - if otr then out:write(" ", otr, "/", oex) end + if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end out:write(" ", fmtfunc(func, pc), "\n") elseif what == "stop" or what == "abort" then out:write("---- TRACE ", tr, " ", what) @@ -651,7 +655,8 @@ end local function dumpon(opt, outfile) if active then dumpoff() end - local colormode = os.getenv("COLORTERM") and "A" or "T" + local term = os.getenv("TERM") + local colormode = (term and term:match("color") or os.getenv("COLORTERM")) and "A" or "T" if opt then opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end) end diff --git a/luajit-2.1/src/jit/p.lua b/luajit-2.1/src/jit/p.lua index 97d4ccd..7be1058 100644 --- a/luajit-2.1/src/jit/p.lua +++ b/luajit-2.1/src/jit/p.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -120,7 +120,7 @@ end -- Show top N list. local function prof_top(count1, count2, samples, indent) local t, n = {}, 0 - for k, v in pairs(count1) do + for k in pairs(count1) do n = n + 1 t[n] = k end @@ -156,6 +156,7 @@ local function prof_annotate(count1, samples) ms = math.max(ms, v) if pct >= prof_min then local file, line = k:match("^(.*):(%d+)$") + if not file then file = k; line = 0 end local fl = files[file] if not fl then fl = {}; files[file] = fl; files[#files+1] = file end line = tonumber(line) diff --git a/luajit-2.1/src/jit/v.lua b/luajit-2.1/src/jit/v.lua index 157c34b..934de98 100644 --- a/luajit-2.1/src/jit/v.lua +++ b/luajit-2.1/src/jit/v.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- Verbose mode of the LuaJIT compiler. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -99,7 +99,7 @@ end local function dump_trace(what, tr, func, pc, otr, oex) if what == "start" then startloc = fmtfunc(func, pc) - startex = otr and "("..otr.."/"..oex..") " or "" + startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" else if what == "abort" then local loc = fmtfunc(func, pc) diff --git a/luajit-2.1/src/jit/zone.lua b/luajit-2.1/src/jit/zone.lua index 69f0f16..fa702c4 100644 --- a/luajit-2.1/src/jit/zone.lua +++ b/luajit-2.1/src/jit/zone.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler zones. -- --- Copyright (C) 2005-2015 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- diff --git a/luajit-2.1/src/lauxlib.h b/luajit-2.1/src/lauxlib.h index fed1491..a44f027 100644 --- a/luajit-2.1/src/lauxlib.h +++ b/luajit-2.1/src/lauxlib.h @@ -15,9 +15,6 @@ #include "lua.h" -#define luaL_getn(L,i) ((int)lua_objlen(L, i)) -#define luaL_setn(L,i,j) ((void)0) /* no op! */ - /* extra error code for `luaL_load' */ #define LUA_ERRFILE (LUA_ERRERR+1) @@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, const char *const lst[]); +/* pre-defined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + LUALIB_API int (luaL_ref) (lua_State *L, int t); LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); @@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, const char *name, const char *mode); LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); +LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, + int sizehint); +LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); +LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); /* @@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) +/* From Lua 5.2. */ +#define luaL_newlibtable(L, l) \ + lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) +#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) + /* ** {====================================================== ** Generic Buffer manipulation @@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); /* }====================================================== */ - -/* compatibility with ref system */ - -/* pre-defined references */ -#define LUA_NOREF (-2) -#define LUA_REFNIL (-1) - -#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ - (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) - -#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) - -#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) - - -#define luaL_reg luaL_Reg - #endif diff --git a/luajit-2.1/src/lib_aux.c b/luajit-2.1/src/lib_aux.c index 4a1b70d..c40565c 100644 --- a/luajit-2.1/src/lib_aux.c +++ b/luajit-2.1/src/lib_aux.c @@ -1,6 +1,6 @@ /* ** Auxiliary library for the Lua/C API. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major parts taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, static int libsize(const luaL_Reg *l) { int size = 0; - for (; l->name; l++) size++; + for (; l && l->name; l++) size++; return size; } +LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) +{ + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, modname); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, modname); + lua_pushvalue(L, -1); + lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ + } + lua_remove(L, -2); /* Remove _LOADED table. */ +} + LUALIB_API void luaL_openlib(lua_State *L, const char *libname, const luaL_Reg *l, int nup) { lj_lib_checkfpu(L); if (libname) { - int size = libsize(l); - /* check whether lib already exists */ - luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); - lua_getfield(L, -1, libname); /* get _LOADED[libname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, libname); - lua_pushvalue(L, -1); - lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ - } - lua_remove(L, -2); /* remove _LOADED table */ - lua_insert(L, -(nup+1)); /* move library table to below upvalues */ - } - for (; l->name; l++) { - int i; - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -nup); - lua_pushcclosure(L, l->func, nup); - lua_setfield(L, -(nup+2), l->name); + luaL_pushmodule(L, libname, libsize(l)); + lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ } - lua_pop(L, nup); /* remove upvalues */ + if (l) + luaL_setfuncs(L, l, nup); + else + lua_pop(L, nup); /* Remove upvalues. */ } LUALIB_API void luaL_register(lua_State *L, const char *libname, @@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, luaL_openlib(L, libname, l, 0); } +LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) +{ + luaL_checkstack(L, nup, "too many upvalues"); + for (; l->name; l++) { + int i; + for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); + lua_setfield(L, -(nup + 2), l->name); + } + lua_pop(L, nup); /* Remove upvalues. */ +} + LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, const char *p, const char *r) { @@ -302,7 +313,7 @@ static int panic(lua_State *L) #ifdef LUAJIT_USE_SYSMALLOC -#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) +#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) #error "Must use builtin allocator for 64 bit target" #endif @@ -334,7 +345,7 @@ LUALIB_API lua_State *luaL_newstate(void) lua_State *L; void *ud = lj_alloc_create(); if (ud == NULL) return NULL; -#if LJ_64 +#if LJ_64 && !LJ_GC64 L = lj_state_newstate(lj_alloc_f, ud); #else L = lua_newstate(lj_alloc_f, ud); @@ -343,7 +354,7 @@ LUALIB_API lua_State *luaL_newstate(void) return L; } -#if LJ_64 +#if LJ_64 && !LJ_GC64 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) { UNUSED(f); UNUSED(ud); diff --git a/luajit-2.1/src/lib_base.c b/luajit-2.1/src/lib_base.c index ca268b1..3a75787 100644 --- a/luajit-2.1/src/lib_base.c +++ b/luajit-2.1/src/lib_base.c @@ -1,6 +1,6 @@ /* ** Base and coroutine library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -23,6 +23,7 @@ #include "lj_tab.h" #include "lj_meta.h" #include "lj_state.h" +#include "lj_frame.h" #if LJ_HASFFI #include "lj_ctype.h" #include "lj_cconv.h" @@ -345,7 +346,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { - if (status == 0) { + if (status == LUA_OK) { if (tvistab(L->base+envarg-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); @@ -418,7 +419,7 @@ LJLIB_CF(dofile) GCstr *fname = lj_lib_optstr(L, 1); setnilV(L->top); L->top = L->base+1; - if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) + if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) lua_error(L); lua_call(L, 0, LUA_MULTRET); return (int)(L->top - L->base) - 1; @@ -495,11 +496,10 @@ LJLIB_CF(print) shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); for (i = 0; i < nargs; i++) { cTValue *o = &L->base[i]; - char buf[STRFMT_MAXBUF_NUM]; const char *str; size_t size; MSize len; - if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) { + if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) { size = len; } else { copyTV(L, L->top+1, o); @@ -537,7 +537,7 @@ LJLIB_CF(coroutine_status) co = threadV(L->base); if (co == L) s = "running"; else if (co->status == LUA_YIELD) s = "suspended"; - else if (co->status != 0) s = "dead"; + else if (co->status != LUA_OK) s = "dead"; else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; else if (co->top == co->base) s = "dead"; else s = "suspended"; @@ -558,6 +558,12 @@ LJLIB_CF(coroutine_running) #endif } +LJLIB_CF(coroutine_isyieldable) +{ + setboolV(L->top++, cframe_canyield(L->cframe)); + return 1; +} + LJLIB_CF(coroutine_create) { lua_State *L1; @@ -577,7 +583,7 @@ LJLIB_ASM(coroutine_yield) static int ffh_resume(lua_State *L, lua_State *co, int wrap) { if (co->cframe != NULL || co->status > LUA_YIELD || - (co->status == 0 && co->top == co->base)) { + (co->status == LUA_OK && co->top == co->base)) { ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; if (wrap) lj_err_caller(L, em); setboolV(L->base-1-LJ_FR2, 0); diff --git a/luajit-2.1/src/lib_bit.c b/luajit-2.1/src/lib_bit.c index 55cb2a8..c979a44 100644 --- a/luajit-2.1/src/lib_bit.c +++ b/luajit-2.1/src/lib_bit.c @@ -1,6 +1,6 @@ /* ** Bit manipulation library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_bit_c diff --git a/luajit-2.1/src/lib_debug.c b/luajit-2.1/src/lib_debug.c index b610fb4..f112b5b 100644 --- a/luajit-2.1/src/lib_debug.c +++ b/luajit-2.1/src/lib_debug.c @@ -1,6 +1,6 @@ /* ** Debug library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) /* ------------------------------------------------------------------------ */ -static const char KEY_HOOK = 'h'; +#define KEY_HOOK ((void *)0x3004) static void hookf(lua_State *L, lua_Debug *ar) { static const char *const hooknames[] = {"call", "return", "line", "count", "tail return"}; - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_rawget(L, LUA_REGISTRYINDEX); if (lua_isfunction(L, -1)) { lua_pushstring(L, hooknames[(int)ar->event]); @@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) count = luaL_optint(L, arg+3, 0); func = hookf; mask = makemask(smask, count); } - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_pushvalue(L, arg+1); lua_rawset(L, LUA_REGISTRYINDEX); lua_sethook(L, func, mask, count); @@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) if (hook != NULL && hook != hookf) { /* external hook? */ lua_pushliteral(L, "external hook"); } else { - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushlightuserdata(L, KEY_HOOK); lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ } lua_pushstring(L, unmakemask(mask, buff)); diff --git a/luajit-2.1/src/lib_ffi.c b/luajit-2.1/src/lib_ffi.c index 343d04c..a37b4d4 100644 --- a/luajit-2.1/src/lib_ffi.c +++ b/luajit-2.1/src/lib_ffi.c @@ -1,6 +1,6 @@ /* ** FFI library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_ffi_c @@ -505,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) } if (sz == CTSIZE_INVALID) lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); - if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) - cd = lj_cdata_new(cts, id, sz); - else - cd = lj_cdata_newv(L, id, sz, ctype_align(info)); + cd = lj_cdata_newx(cts, id, sz, info); setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), o, (MSize)(L->top - o)); /* Initialize cdata. */ @@ -832,7 +829,7 @@ static GCtab *ffi_finalizer(lua_State *L) settabV(L, L->top++, t); setgcref(t->metatable, obj2gco(t)); setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), - lj_str_newlit(L, "K")); + lj_str_newlit(L, "k")); t->nomm = (uint8_t)(~(1u<<MM_mode)); return t; } diff --git a/luajit-2.1/src/lib_init.c b/luajit-2.1/src/lib_init.c index 85c194a..2ed370e 100644 --- a/luajit-2.1/src/lib_init.c +++ b/luajit-2.1/src/lib_init.c @@ -1,6 +1,6 @@ /* ** Library initialization. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major parts taken verbatim from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/luajit-2.1/src/lib_io.c b/luajit-2.1/src/lib_io.c index 2aa8347..9763ed4 100644 --- a/luajit-2.1/src/lib_io.c +++ b/luajit-2.1/src/lib_io.c @@ -1,6 +1,6 @@ /* ** I/O library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -203,13 +203,12 @@ static int io_file_read(lua_State *L, FILE *fp, int start) for (n = start; nargs-- && ok; n++) { if (tvisstr(L->base+n)) { const char *p = strVdata(L->base+n); - if (p[0] != '*') - lj_err_arg(L, n+1, LJ_ERR_INVOPT); - if (p[1] == 'n') + if (p[0] == '*') p++; + if (p[0] == 'n') ok = io_file_readnum(L, fp); - else if ((p[1] & ~0x20) == 'L') - ok = io_file_readline(L, fp, (p[1] == 'l')); - else if (p[1] == 'a') + else if ((p[0] & ~0x20) == 'L') + ok = io_file_readline(L, fp, (p[0] == 'l')); + else if (p[0] == 'a') io_file_readall(L, fp); else lj_err_arg(L, n+1, LJ_ERR_INVFMT); @@ -232,9 +231,8 @@ static int io_file_write(lua_State *L, FILE *fp, int start) cTValue *tv; int status = 1; for (tv = L->base+start; tv < L->top; tv++) { - char buf[STRFMT_MAXBUF_NUM]; MSize len; - const char *p = lj_strfmt_wstrnum(buf, tv, &len); + const char *p = lj_strfmt_wstrnum(L, tv, &len); if (!p) lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); status = status && (fwrite(p, 1, len, fp) == len); diff --git a/luajit-2.1/src/lib_jit.c b/luajit-2.1/src/lib_jit.c index 178ef24..22ca0a1 100644 --- a/luajit-2.1/src/lib_jit.c +++ b/luajit-2.1/src/lib_jit.c @@ -1,6 +1,6 @@ /* ** JIT library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lib_jit_c @@ -204,6 +204,7 @@ LJLIB_CF(jit_util_funcinfo) lua_setfield(L, -2, "source"); lj_debug_pushloc(L, pt, pc); lua_setfield(L, -2, "loc"); + setprotoV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "proto")), pt); } else { GCfunc *fn = funcV(L->base); GCtab *t; @@ -668,6 +669,11 @@ static uint32_t jit_cpudetect(lua_State *L) if (fam >= 0x00000f00) /* K8, K10. */ flags |= JIT_F_PREFER_IMUL; } + if (vendor[0] >= 7) { + uint32_t xfeatures[4]; + lj_vm_cpuid(7, xfeatures); + flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; + } #endif } /* Check for required instruction set support on x86 (unnecessary on x64). */ @@ -710,15 +716,19 @@ static uint32_t jit_cpudetect(lua_State *L) #if LJ_HASJIT /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 - flags |= JIT_F_MIPS32R2; + flags |= JIT_F_MIPSXXR2; #endif /* Runtime MIPS CPU detection. */ #if defined(__GNUC__) - if (!(flags & JIT_F_MIPS32R2)) { + if (!(flags & JIT_F_MIPSXXR2)) { int x; +#ifdef __mips16 + x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ +#else /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); - if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ +#endif + if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif #endif diff --git a/luajit-2.1/src/lib_math.c b/luajit-2.1/src/lib_math.c index 78838fc..ef9dda2 100644 --- a/luajit-2.1/src/lib_math.c +++ b/luajit-2.1/src/lib_math.c @@ -1,6 +1,6 @@ /* ** Math library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include <math.h> @@ -221,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L) rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); rs->valid = 0; /* Use lazy initialization to save some time on startup. */ LJ_LIB_REG(L, LUA_MATHLIBNAME, math); -#if defined(LUA_COMPAT_MOD) && !LJ_52 - lua_getfield(L, -1, "fmod"); - lua_setfield(L, -2, "mod"); -#endif return 1; } diff --git a/luajit-2.1/src/lib_os.c b/luajit-2.1/src/lib_os.c index 37d7d5b..9e78d49 100644 --- a/luajit-2.1/src/lib_os.c +++ b/luajit-2.1/src/lib_os.c @@ -1,6 +1,6 @@ /* ** OS library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/luajit-2.1/src/lib_package.c b/luajit-2.1/src/lib_package.c index 32ba4d3..6fac43e 100644 --- a/luajit-2.1/src/lib_package.c +++ b/luajit-2.1/src/lib_package.c @@ -1,6 +1,6 @@ /* ** Package library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -193,8 +193,7 @@ static void **ll_register(lua_State *L, const char *path) lua_pop(L, 1); plib = (void **)lua_newuserdata(L, sizeof(void *)); *plib = NULL; - luaL_getmetatable(L, "_LOADLIB"); - lua_setmetatable(L, -2); + luaL_setmetatable(L, "_LOADLIB"); lua_pushfstring(L, "LOADLIB: %s", path); lua_pushvalue(L, -2); lua_settable(L, LUA_REGISTRYINDEX); @@ -399,8 +398,7 @@ static int lj_cf_package_loader_preload(lua_State *L) /* ------------------------------------------------------------------------ */ -static const int sentinel_ = 0; -#define sentinel ((void *)&sentinel_) +#define sentinel ((void *)0x4004) static int lj_cf_package_require(lua_State *L) { @@ -490,29 +488,19 @@ static void modinit(lua_State *L, const char *modname) static int lj_cf_package_module(lua_State *L) { const char *modname = luaL_checkstring(L, 1); - int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ - lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); - lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, modname); - lua_pushvalue(L, -1); - lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ - } - /* check whether table already has a _NAME field */ + int lastarg = (int)(L->top - L->base); + luaL_pushmodule(L, modname, 1); lua_getfield(L, -1, "_NAME"); - if (!lua_isnil(L, -1)) { /* is table an initialized module? */ + if (!lua_isnil(L, -1)) { /* Module already initialized? */ lua_pop(L, 1); - } else { /* no; initialize it */ + } else { lua_pop(L, 1); modinit(L, modname); } lua_pushvalue(L, -1); setfenv(L); - dooptions(L, loaded - 1); - return 0; + dooptions(L, lastarg); + return LJ_52; } static int lj_cf_package_seeall(lua_State *L) @@ -583,13 +571,16 @@ LUALIB_API int luaopen_package(lua_State *L) lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); lua_setfield(L, -2, "__gc"); luaL_register(L, LUA_LOADLIBNAME, package_lib); - lua_pushvalue(L, -1); - lua_replace(L, LUA_ENVIRONINDEX); + lua_copy(L, -1, LUA_ENVIRONINDEX); lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); for (i = 0; package_loaders[i] != NULL; i++) { lj_lib_pushcf(L, package_loaders[i], 1); lua_rawseti(L, -2, i+1); } +#if LJ_52 + lua_pushvalue(L, -1); + lua_setfield(L, -3, "searchers"); +#endif lua_setfield(L, -2, "loaders"); lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); noenv = lua_toboolean(L, -1); diff --git a/luajit-2.1/src/lib_string.c b/luajit-2.1/src/lib_string.c index a6d9986..76b0730 100644 --- a/luajit-2.1/src/lib_string.c +++ b/luajit-2.1/src/lib_string.c @@ -1,6 +1,6 @@ /* ** String library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -737,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L) GCtab *mt; global_State *g; LJ_LIB_REG(L, LUA_STRLIBNAME, string); -#if defined(LUA_COMPAT_GFIND) && !LJ_52 - lua_getfield(L, -1, "gmatch"); - lua_setfield(L, -2, "gfind"); -#endif mt = lj_tab_new(L, 0, 1); /* NOBARRIER: basemt is a GC root. */ g = G(L); diff --git a/luajit-2.1/src/lib_table.c b/luajit-2.1/src/lib_table.c index 56612ab..0450f1f 100644 --- a/luajit-2.1/src/lib_table.c +++ b/luajit-2.1/src/lib_table.c @@ -1,6 +1,6 @@ /* ** Table library. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -129,6 +129,26 @@ LJLIB_LUA(table_remove) /* end */ +LJLIB_LUA(table_move) /* + function(a1, f, e, t, a2) + CHECK_tab(a1) + CHECK_int(f) + CHECK_int(e) + CHECK_int(t) + if a2 == nil then a2 = a1 end + CHECK_tab(a2) + if e >= f then + local d = t - f + if t > e or t <= f or a2 ~= a1 then + for i=f,e do a2[i+d] = a1[i] end + else + for i=e,f,-1 do a2[i+d] = a1[i] end + end + end + return a2 + end +*/ + LJLIB_CF(table_concat) LJLIB_REC(.) { GCtab *t = lj_lib_checktab(L, 1); diff --git a/luajit-2.1/src/lj.supp b/luajit-2.1/src/lj.supp index acb9e78..217f7c8 100644 --- a/luajit-2.1/src/lj.supp +++ b/luajit-2.1/src/lj.supp @@ -27,15 +27,15 @@ { Optimized string compare Memcheck:Addr4 - fun:lj_str_fastcmp + fun:str_fastcmp } { Optimized string compare Memcheck:Addr1 - fun:lj_str_fastcmp + fun:str_fastcmp } { Optimized string compare Memcheck:Cond - fun:lj_str_fastcmp + fun:str_fastcmp } diff --git a/luajit-2.1/src/lj_alloc.c b/luajit-2.1/src/lj_alloc.c index ddd50ca..95d15d0 100644 --- a/luajit-2.1/src/lj_alloc.c +++ b/luajit-2.1/src/lj_alloc.c @@ -72,13 +72,56 @@ #define IS_DIRECT_BIT (SIZE_T_ONE) + +/* Determine system-specific block allocation method. */ #if LJ_TARGET_WINDOWS #define WIN32_LEAN_AND_MEAN #include <windows.h> +#define LJ_ALLOC_VIRTUALALLOC 1 + #if LJ_64 && !LJ_GC64 +#define LJ_ALLOC_NTAVM 1 +#endif + +#else + +#include <errno.h> +/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ +#include <sys/mman.h> + +#define LJ_ALLOC_MMAP 1 + +#if LJ_64 + +#define LJ_ALLOC_MMAP_PROBE 1 + +#if LJ_GC64 +#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ +#elif LJ_TARGET_X64 && LJ_HASJIT +/* Due to limitations in the x64 compiler backend. */ +#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ +#else +#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ +#endif + +#endif + +#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) +#define LJ_ALLOC_MMAP32 1 +#endif + +#if LJ_TARGET_LINUX +#define LJ_ALLOC_MREMAP 1 +#endif + +#endif + +#if LJ_ALLOC_VIRTUALALLOC + +#if LJ_ALLOC_NTAVM /* Undocumented, but hey, that's what we all love so much about Windows. */ typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, size_t *size, ULONG alloctype, ULONG prot); @@ -89,14 +132,15 @@ static PNTAVM ntavm; */ #define NTAVM_ZEROBITS 1 -static void INIT_MMAP(void) +static void init_mmap(void) { ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), "NtAllocateVirtualMemory"); } +#define INIT_MMAP() init_mmap() /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *CALL_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *DIRECT_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) #else -#define INIT_MMAP() ((void)0) - /* Win32 MMAP via VirtualAlloc */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *CALL_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); @@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *DIRECT_MMAP(size_t size) { DWORD olderr = GetLastError(); void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, @@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) #endif /* This function supports releasing coalesed segments */ -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static int CALL_MUNMAP(void *ptr, size_t size) { DWORD olderr = GetLastError(); MEMORY_BASIC_INFORMATION minfo; @@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return 0; } -#else - -#include <errno.h> -#include <sys/mman.h> +#elif LJ_ALLOC_MMAP #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) @@ -174,107 +213,151 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) #endif #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#if LJ_64 && !LJ_GC64 -/* 64 bit mode with 32 bit pointers needs special support for allocating -** memory in the lower 2GB. -*/ - -#if defined(MAP_32BIT) +#if LJ_ALLOC_MMAP_PROBE -#if defined(__sun__) -#define MMAP_REGION_START ((uintptr_t)0x1000) +#ifdef MAP_TRYFIXED +#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) #else -/* Actually this only gives us max. 1GB in current Linux kernels. */ -#define MMAP_REGION_START ((uintptr_t)0) +#define MMAP_FLAGS_PROBE MMAP_FLAGS #endif -static LJ_AINLINE void *CALL_MMAP(size_t size) -{ - int olderr = errno; - void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); - errno = olderr; - return ptr; -} +#define LJ_ALLOC_MMAP_PROBE_MAX 30 +#define LJ_ALLOC_MMAP_PROBE_LINEAR 5 -#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__) +#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) -/* OSX and FreeBSD mmap() use a naive first-fit linear search. -** That's perfect for us. Except that -pagezero_size must be set for OSX, -** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs -** to be reduced to 250MB on FreeBSD. +/* No point in a giant ifdef mess. Just try to open /dev/urandom. +** It doesn't really matter if this fails, since we get some ASLR bits from +** every unsuitable allocation, too. And we prefer linear allocation, anyway. */ -#if LJ_TARGET_OSX || defined(__DragonFly__) -#define MMAP_REGION_START ((uintptr_t)0x10000) -#elif LJ_TARGET_PS4 -#define MMAP_REGION_START ((uintptr_t)0x4000) -#else -#define MMAP_REGION_START ((uintptr_t)0x10000000) -#endif -#define MMAP_REGION_END ((uintptr_t)0x80000000) +#include <fcntl.h> +#include <unistd.h> -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -#include <sys/resource.h> -#endif +static uintptr_t mmap_probe_seed(void) +{ + uintptr_t val; + int fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) { + int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); + (void)close(fd); + if (ok) return val; + } + return 1; /* Punt. */ +} -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *mmap_probe(size_t size) { - int olderr = errno; /* Hint for next allocation. Doesn't need to be thread-safe. */ - static uintptr_t alloc_hint = MMAP_REGION_START; - int retry = 0; -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - static int rlimit_modified = 0; - if (LJ_UNLIKELY(rlimit_modified == 0)) { - struct rlimit rlim; - rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; - setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ - rlimit_modified = 1; - } -#endif - for (;;) { - void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); - if ((uintptr_t)p >= MMAP_REGION_START && - (uintptr_t)p + size < MMAP_REGION_END) { - alloc_hint = (uintptr_t)p + size; + static uintptr_t hint_addr = 0; + static uintptr_t hint_prng = 0; + int olderr = errno; + int retry; + for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { + void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); + uintptr_t addr = (uintptr_t)p; + if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { + /* We got a suitable address. Bump the hint address. */ + hint_addr = addr + size; errno = olderr; return p; } - if (p != CMFAIL) munmap(p, size); -#if defined(__sun__) || defined(__DragonFly__) - alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ - if (alloc_hint + size < MMAP_REGION_END) continue; -#endif - if (retry) break; - retry = 1; - alloc_hint = MMAP_REGION_START; + if (p != MFAIL) { + munmap(p, size); + } else if (errno == ENOMEM) { + return MFAIL; + } + if (hint_addr) { + /* First, try linear probing. */ + if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { + hint_addr += 0x1000000; + if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) + hint_addr = 0; + continue; + } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { + /* Next, try a no-hint probe to get back an ASLR address. */ + hint_addr = 0; + continue; + } + } + /* Finally, try pseudo-random probing. */ + if (LJ_UNLIKELY(hint_prng == 0)) { + hint_prng = mmap_probe_seed(); + } + /* The unsuitable address we got has some ASLR PRNG bits. */ + hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); + do { /* The PRNG itself is very weak, but see above. */ + hint_prng = hint_prng * 1103515245 + 12345; + hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; + hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); + } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); } errno = olderr; - return CMFAIL; + return MFAIL; } +#endif + +#if LJ_ALLOC_MMAP32 + +#if defined(__sun__) +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) #else +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0) +#endif -#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" +static void *mmap_map32(size_t size) +{ +#if LJ_ALLOC_MMAP_PROBE + static int fallback = 0; + if (fallback) + return mmap_probe(size); +#endif + { + int olderr = errno; + void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); + errno = olderr; + /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ +#if LJ_ALLOC_MMAP_PROBE + if (ptr == MFAIL) { + fallback = 1; + return mmap_probe(size); + } +#endif + return ptr; + } +} #endif +#if LJ_ALLOC_MMAP32 +#define CALL_MMAP(size) mmap_map32(size) +#elif LJ_ALLOC_MMAP_PROBE +#define CALL_MMAP(size) mmap_probe(size) #else - -/* 32 bit mode and GC64 mode is easy. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *CALL_MMAP(size_t size) { int olderr = errno; void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); errno = olderr; return ptr; } - #endif -#define INIT_MMAP() ((void)0) -#define DIRECT_MMAP(s) CALL_MMAP(s) +#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 + +#include <sys/resource.h> + +static void init_mmap(void) +{ + struct rlimit rlim; + rlim.rlim_cur = rlim.rlim_max = 0x10000; + setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ +} +#define INIT_MMAP() init_mmap() + +#endif -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static int CALL_MUNMAP(void *ptr, size_t size) { int olderr = errno; int ret = munmap(ptr, size); @@ -282,10 +365,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return ret; } -#if LJ_TARGET_LINUX +#if LJ_ALLOC_MREMAP /* Need to define _GNU_SOURCE to get the mremap prototype. */ -static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, - int flags) +static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) { int olderr = errno; ptr = mremap(ptr, osz, nsz, flags); @@ -305,6 +387,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, #endif + +#ifndef INIT_MMAP +#define INIT_MMAP() ((void)0) +#endif + +#ifndef DIRECT_MMAP +#define DIRECT_MMAP(s) CALL_MMAP(s) +#endif + #ifndef CALL_MREMAP #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) #endif diff --git a/luajit-2.1/src/lj_api.c b/luajit-2.1/src/lj_api.c index 042b0d9..d17a575 100644 --- a/luajit-2.1/src/lj_api.c +++ b/luajit-2.1/src/lj_api.c @@ -1,6 +1,6 @@ /* ** Public Lua/C API. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -112,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) from->top = f; } +LUA_API const lua_Number *lua_version(lua_State *L) +{ + static const lua_Number version = LUA_VERSION_NUM; + UNUSED(L); + return &version; +} + /* -- Stack manipulation -------------------------------------------------- */ LUA_API int lua_gettop(lua_State *L) @@ -152,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx) copyTV(L, p, L->top); } -LUA_API void lua_replace(lua_State *L, int idx) +static void copy_slot(lua_State *L, TValue *f, int idx) { - api_checknelems(L, 1); if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(L->top-1)); + api_check(L, tvistab(f)); /* NOBARRIER: A thread (i.e. L) is never black. */ - setgcref(L->env, obj2gco(tabV(L->top-1))); + setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(L->top-1)); - setgcref(fn->c.env, obj2gco(tabV(L->top-1))); - lj_gc_barrier(L, fn, L->top-1); + api_check(L, tvistab(f)); + setgcref(fn->c.env, obj2gco(tabV(f))); + lj_gc_barrier(L, fn, f); } else { TValue *o = index2adr(L, idx); api_checkvalidindex(L, o); - copyTV(L, o, L->top-1); + copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ - lj_gc_barrier(L, curr_func(L), L->top-1); + lj_gc_barrier(L, curr_func(L), f); } +} + +LUA_API void lua_replace(lua_State *L, int idx) +{ + api_checknelems(L, 1); + copy_slot(L, L->top - 1, idx); L->top--; } +LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) +{ + copy_slot(L, index2adr(L, fromidx), toidx); +} + LUA_API void lua_pushvalue(lua_State *L, int idx) { copyTV(L, L->top, index2adr(L, idx)); @@ -325,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) return 0; } +LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + if (LJ_LIKELY(tvisnumber(o))) { + if (ok) *ok = 1; + return numberVnum(o); + } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { + if (ok) *ok = 1; + return numV(&tmp); + } else { + if (ok) *ok = 0; + return 0; + } +} + LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -362,7 +395,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) return 0; if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); + return intV(&tmp); n = numV(&tmp); } #if LJ_64 @@ -372,6 +405,35 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) #endif } +LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + lua_Number n; + if (LJ_LIKELY(tvisint(o))) { + if (ok) *ok = 1; + return intV(o); + } else if (LJ_LIKELY(tvisnum(o))) { + n = numV(o); + } else { + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { + if (ok) *ok = 0; + return 0; + } + if (tvisint(&tmp)) { + if (ok) *ok = 1; + return intV(&tmp); + } + n = numV(&tmp); + } + if (ok) *ok = 1; +#if LJ_64 + return (lua_Integer)n; +#else + return lj_num2int(n); +#endif +} + LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -858,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } -LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) { cTValue *o = index2adr(L, idx); if (tvisudata(o)) { @@ -867,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) return uddata(ud); } - lj_err_argtype(L, idx, tname); - return NULL; /* unreachable */ + return NULL; /* value is not a userdata with a metatable */ +} + +LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +{ + void *p = luaL_testudata(L, idx, tname); + if (!p) lj_err_argtype(L, idx, tname); + return p; } /* -- Object setters ------------------------------------------------------ */ @@ -977,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) return 1; } +LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) +{ + lua_getfield(L, LUA_REGISTRYINDEX, tname); + lua_setmetatable(L, -2); +} + LUA_API int lua_setfenv(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -1032,7 +1106,7 @@ static TValue *api_call_base(lua_State *L, int nargs) LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); lj_vm_call(L, api_call_base(L, nargs), nresults+1); } @@ -1043,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); if (errfunc == 0) { ef = 0; @@ -1075,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; @@ -1096,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) /* -- Coroutine yield and resume ------------------------------------------ */ +LUA_API int lua_isyieldable(lua_State *L) +{ + return cframe_canyield(L->cframe); +} + LUA_API int lua_yield(lua_State *L, int nresults) { void *cf = L->cframe; @@ -1140,7 +1219,7 @@ LUA_API int lua_resume(lua_State *L, int nargs) { if (L->cframe == NULL && L->status <= LUA_YIELD) return lj_vm_resume(L, - L->status == 0 ? api_call_base(L, nargs) : L->top - nargs, + L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, 0, 0); L->top = L->base; setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); diff --git a/luajit-2.1/src/lj_arch.h b/luajit-2.1/src/lj_arch.h index c66a11c..c8d7138 100644 --- a/luajit-2.1/src/lj_arch.h +++ b/luajit-2.1/src/lj_arch.h @@ -1,6 +1,6 @@ /* ** Target architecture selection. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ARCH_H @@ -25,6 +25,10 @@ #define LUAJIT_ARCH_ppc 5 #define LUAJIT_ARCH_MIPS 6 #define LUAJIT_ARCH_mips 6 +#define LUAJIT_ARCH_MIPS32 6 +#define LUAJIT_ARCH_mips32 6 +#define LUAJIT_ARCH_MIPS64 7 +#define LUAJIT_ARCH_mips64 7 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -47,8 +51,10 @@ #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC +#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else #error "No support for this architecture (yet)" #endif @@ -68,7 +74,10 @@ defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__DragonFly__)) && !defined(__ORBIS__) #define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) || defined(__CYGWIN__) +#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) +#define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__CYGWIN__) +#define LJ_TARGET_CYGWIN 1 #define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER @@ -137,7 +146,7 @@ #define LJ_ARCH_NAME "x86" #define LJ_ARCH_BITS 32 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || __CYGWIN__ +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN #define LJ_ABI_WIN 1 #else #define LJ_ABI_WIN 0 @@ -155,7 +164,7 @@ #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || __CYGWIN__ +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN #define LJ_ABI_WIN 1 #else #define LJ_ABI_WIN 0 @@ -206,9 +215,14 @@ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 -#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_BITS 64 +#if defined(__AARCH64EB__) +#define LJ_ARCH_NAME "arm64be" +#define LJ_ARCH_ENDIAN LUAJIT_BE +#else +#define LJ_ARCH_NAME "arm64" #define LJ_ARCH_ENDIAN LUAJIT_LE +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ @@ -217,7 +231,6 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#define LJ_ARCH_NOJIT 1 /* NYI */ #define LJ_ARCH_VERSION 80 @@ -286,25 +299,60 @@ #define LJ_ARCH_XENON 1 #endif -#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS +#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_NAME "mipsel" +#else +#define LJ_ARCH_NAME "mips64el" +#endif #define LJ_ARCH_ENDIAN LUAJIT_LE #else +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_NAME "mips" +#else +#define LJ_ARCH_NAME "mips64" +#endif #define LJ_ARCH_ENDIAN LUAJIT_BE #endif + +#if !defined(LJ_ARCH_HASFPU) +#ifdef __mips_soft_float +#define LJ_ARCH_HASFPU 0 +#else +#define LJ_ARCH_HASFPU 1 +#endif +#endif + +#if !defined(LJ_ABI_SOFTFP) +#ifdef __mips_soft_float +#define LJ_ABI_SOFTFP 1 +#else +#define LJ_ABI_SOFTFP 0 +#endif +#endif + +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_BITS 32 +#define LJ_TARGET_MIPS32 1 +#else +#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU +#define LJ_ARCH_NOJIT 1 /* NYI */ +#endif +#define LJ_ARCH_BITS 64 +#define LJ_TARGET_MIPS64 1 +#define LJ_TARGET_GC64 1 +#endif #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if _MIPS_ARCH_MIPS32R2 +#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 #define LJ_ARCH_VERSION 20 #else #define LJ_ARCH_VERSION 10 @@ -334,7 +382,7 @@ #endif #elif LJ_TARGET_ARM64 #if __clang__ -#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5) +#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) #error "Need at least Clang 3.5 or newer" #endif #else @@ -366,9 +414,6 @@ #error "Only ARM EABI or iOS 3.0+ ABI is supported" #endif #elif LJ_TARGET_ARM64 -#if defined(__AARCH64EB__) -#error "No support for big-endian ARM64" -#endif #if defined(_ILP32) #error "No support for ILP32 model on ARM64" #endif @@ -385,12 +430,13 @@ #ifdef __NO_FPRS__ #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #endif -#elif LJ_TARGET_MIPS -#if defined(__mips_soft_float) -#error "No support for MIPS CPUs without FPU" +#elif LJ_TARGET_MIPS32 +#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) +#error "Only o32 ABI supported for MIPS32" #endif -#if defined(_LP64) -#error "No support for MIPS64" +#elif LJ_TARGET_MIPS64 +#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) +#error "Only n64 ABI supported for MIPS64" #endif #endif #endif @@ -431,7 +477,7 @@ #endif /* Disable or enable the JIT compiler. */ -#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) #define LJ_HASJIT 0 #else #define LJ_HASJIT 1 @@ -492,7 +538,7 @@ #endif /* Various workarounds for embedded operating systems or weak C runtimes. */ -#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS +#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS #define LUAJIT_NO_LOG2 #endif #if defined(__symbian__) || LJ_TARGET_WINDOWS @@ -502,6 +548,11 @@ #define LJ_NO_SYSTEM 1 #endif +#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ +/* NYI: no support for compact unwind specification, yet. */ +#define LUAJIT_NO_UNWIND 1 +#endif + #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 #define LJ_NO_UNWIND 1 #endif diff --git a/luajit-2.1/src/lj_asm.c b/luajit-2.1/src/lj_asm.c index 9db950a..c2cf5a9 100644 --- a/luajit-2.1/src/lj_asm.c +++ b/luajit-2.1/src/lj_asm.c @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_asm_c @@ -91,7 +91,7 @@ typedef struct ASMState { MCode *realign; /* Realign loop if not NULL. */ #ifdef RID_NUM_KREF - int32_t krefk[RID_NUM_KREF]; + intptr_t krefk[RID_NUM_KREF]; #endif IRRef1 phireg[RID_MAX]; /* PHI register references. */ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ @@ -144,7 +144,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) #define ra_krefk(as, ref) (as->krefk[(ref)]) -static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) +static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) { IRRef ref = (IRRef)(r - RID_MIN_KREF); as->krefk[ref] = k; @@ -171,6 +171,8 @@ IRFLDEF(FLOFS) #include "lj_emit_x86.h" #elif LJ_TARGET_ARM #include "lj_emit_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_emit_arm64.h" #elif LJ_TARGET_PPC #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS @@ -322,7 +324,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref) lua_assert(!rset_test(as->freeset, r)); ra_free(as, r); ra_modified(as, r); +#if LJ_64 + emit_loadu64(as, r, ra_krefk(as, ref)); +#else emit_loadi(as, r, ra_krefk(as, ref)); +#endif return r; } ir = IR(ref); @@ -334,7 +340,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) RA_DBGX((as, "remat $i $r", ir, r)); #if !LJ_SOFTFP if (ir->o == IR_KNUM) { - emit_loadn(as, r, ir_knum(ir)); + emit_loadk64(as, r, ir); } else #endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { @@ -346,6 +352,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref) #if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); +#if LJ_GC64 + } else if (ir->o == IR_KGC) { + emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); +#endif #endif } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || @@ -518,7 +530,7 @@ static void ra_evictk(ASMState *as) #ifdef RID_NUM_KREF /* Allocate a register for a constant. */ -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) { /* First try to find a register which already holds the same constant. */ RegSet pick, work = ~as->freeset & RSET_GPR; @@ -527,9 +539,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) IRRef ref; r = rset_pickbot(work); ref = regcost_ref(as->cost[r]); +#if LJ_64 + if (ref < ASMREF_L) { + if (ra_iskref(ref)) { + if (k == ra_krefk(as, ref)) + return r; + } else { + IRIns *ir = IR(ref); + if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || +#if LJ_GC64 + (ir->o == IR_KINT && k == ir->i) || + (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || + ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && + k == (intptr_t)ir_kptr(ir)) +#else + (ir->o != IR_KINT64 && k == ir->i) +#endif + ) + return r; + } + } +#else if (ref < ASMREF_L && k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) return r; +#endif rset_clear(work, r); } pick = as->freeset & allow; @@ -549,7 +583,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) } /* Allocate a specific register for a constant. */ -static void ra_allockreg(ASMState *as, int32_t k, Reg r) +static void ra_allockreg(ASMState *as, intptr_t k, Reg r) { Reg kr = ra_allock(as, k, RID2RSET(r)); if (kr != r) { @@ -619,10 +653,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) return r; } +/* Add a register rename to the IR. */ +static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) +{ + IRRef ren; + lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); + ren = tref_ref(lj_ir_emit(as->J)); + as->J->cur.ir[ren].r = (uint8_t)down; + as->J->cur.ir[ren].s = SPS_NONE; +} + /* Rename register allocation and emit move. */ static void ra_rename(ASMState *as, Reg down, Reg up) { - IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); + IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); IRIns *ir = IR(ref); ir->r = (uint8_t)up; as->cost[down] = 0; @@ -635,11 +679,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)down; - IR(ren)->s = SPS_NONE; + ra_addrename(as, down, ref, as->snapno); } } @@ -689,17 +729,21 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) if (ra_noreg(left)) { if (irref_isk(lref)) { if (ir->o == IR_KNUM) { - cTValue *tv = ir_knum(ir); /* FP remat needs a load except for +0. Still better than eviction. */ - if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { - emit_loadn(as, dest, tv); + if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { + emit_loadk64(as, dest, ir); return; } #if LJ_64 } else if (ir->o == IR_KINT64) { - emit_loadu64(as, dest, ir_kint64(ir)->u64); + emit_loadk64(as, dest, ir); + return; +#if LJ_GC64 + } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadk64(as, dest, ir); return; #endif +#endif } else if (ir->o != IR_KPRI) { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); @@ -941,7 +985,7 @@ static void asm_snap_prep(ASMState *as) } else { /* Process any renames above the highwater mark. */ for (; as->snaprename < as->T->nins; as->snaprename++) { - IRIns *ir = IR(as->snaprename); + IRIns *ir = &as->T->ir[as->snaprename]; if (asm_snap_checkrename(as, ir->op1)) ir->op2 = REF_BIAS-1; /* Kill rename. */ } @@ -1055,7 +1099,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir) } } else { Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); - /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ + /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); } @@ -1472,12 +1516,7 @@ static void asm_phi_fixup(ASMState *as) irt_clearmark(ir->t); /* Left PHI gained a spill slot before the loop? */ if (ra_hasspill(ir->s)) { - IRRef ren; - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)r; - IR(ren)->s = SPS_NONE; + ra_addrename(as, r, lref, as->loopsnapno); } } rset_clear(work, r); @@ -1552,6 +1591,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_x86.h" #elif LJ_TARGET_ARM #include "lj_asm_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_asm_arm64.h" #elif LJ_TARGET_PPC #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS @@ -1609,16 +1650,24 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ADD: asm_add(as, ir); break; case IR_SUB: asm_sub(as, ir); break; case IR_MUL: asm_mul(as, ir); break; - case IR_DIV: asm_div(as, ir); break; case IR_MOD: asm_mod(as, ir); break; - case IR_POW: asm_pow(as, ir); break; case IR_NEG: asm_neg(as, ir); break; +#if LJ_SOFTFP + case IR_DIV: case IR_POW: case IR_ABS: + case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: + lua_assert(0); /* Unused for LJ_SOFTFP. */ + break; +#else + case IR_DIV: asm_div(as, ir); break; + case IR_POW: asm_pow(as, ir); break; case IR_ABS: asm_abs(as, ir); break; case IR_ATAN2: asm_atan2(as, ir); break; case IR_LDEXP: asm_ldexp(as, ir); break; + case IR_FPMATH: asm_fpmath(as, ir); break; + case IR_TOBIT: asm_tobit(as, ir); break; +#endif case IR_MIN: asm_min(as, ir); break; case IR_MAX: asm_max(as, ir); break; - case IR_FPMATH: asm_fpmath(as, ir); break; /* Overflow-checking arithmetic ops. */ case IR_ADDOV: asm_addov(as, ir); break; @@ -1663,7 +1712,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_OBAR: asm_obar(as, ir); break; /* Type conversions. */ - case IR_TOBIT: asm_tobit(as, ir); break; case IR_CONV: asm_conv(as, ir); break; case IR_TOSTR: asm_tostr(as, ir); break; case IR_STRTO: asm_strto(as, ir); break; @@ -1881,7 +1929,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) SnapEntry sn = map[n-1]; if ((sn & SNAP_FRAME)) { *gotframe = 1; - return snap_slot(sn); + return snap_slot(sn) - LJ_FR2; } } return 0; @@ -1901,16 +1949,20 @@ static void asm_tail_link(ASMState *as) if (as->T->link == 0) { /* Setup fixed registers for exit to interpreter. */ - const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); + const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); int32_t mres; if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; if (bc_isret(bc_op(*retpc))) pc = retpc; } +#if LJ_GC64 + emit_loadu64(as, RID_LPC, u64ptr(pc)); +#else ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); - mres = (int32_t)(snap->nslots - baseslot); +#endif + mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; @@ -1925,6 +1977,11 @@ static void asm_tail_link(ASMState *as) } emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); + if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ + setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); + IR(as->J->ktrace)->o = IR_KGC; + } + /* Sync the interpreter state with the on-trace state. */ asm_stack_restore(as, snap); @@ -1950,17 +2007,22 @@ static void asm_setup_regsp(ASMState *as) ra_setup(as); /* Clear reg/sp for constants. */ - for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) + for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { +#if LJ_GC64 + ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ +#else + /* Make life easier for backends by putting address of constant in i. */ + ir->i = (int32_t)(intptr_t)(ir+1); +#endif + ir++; + } + } /* REF_BASE is used for implicit references to the BASE register. */ lastir->prev = REGSP_HINT(RID_BASE); - ir = IR(nins-1); - if (ir->o == IR_RENAME) { - do { ir--; nins--; } while (ir->o == IR_RENAME); - T->nins = nins; /* Remove any renames left over from ASM restart. */ - } as->snaprename = nins; as->snapref = nins; as->snapno = T->nsnap; @@ -2143,7 +2205,10 @@ static void asm_setup_regsp(ASMState *as) #endif #if LJ_TARGET_X86ORX64 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ - case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: + case IR_BSHL: case IR_BSHR: case IR_BSAR: + if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ + break; + case IR_BROL: case IR_BROR: if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { IR(ir->op2)->r = REGSP_HINT(RID_ECX); if (inloop) @@ -2189,14 +2254,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) ASMState *as = &as_; MCode *origtop; + /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ + { + IRRef nins = T->nins; + IRIns *ir = &T->ir[nins-1]; + if (ir->o == IR_NOP || ir->o == IR_RENAME) { + do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); + T->nins = nins; + } + } + /* Ensure an initialized instruction beyond the last one for HIOP checks. */ - J->cur.nins = lj_ir_nextins(J); - J->cur.ir[J->cur.nins].o = IR_NOP; + /* This also allows one RENAME to be added without reallocating curfinal. */ + as->orignins = lj_ir_nextins(J); + J->cur.ir[as->orignins].o = IR_NOP; /* Setup initial state. Copy some fields to reduce indirections. */ as->J = J; as->T = T; - as->ir = T->ir; + J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ as->flags = J->flags; as->loopref = J->loopref; as->realign = NULL; @@ -2209,12 +2285,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->mclim = as->mcbot + MCLIM_REDZONE; asm_setup_target(as); - do { + /* + ** This is a loop, because the MCode may have to be (re-)assembled + ** multiple times: + ** + ** 1. as->realign is set (and the assembly aborted), if the arch-specific + ** backend wants the MCode to be aligned differently. + ** + ** This is currently only the case on x86/x64, where small loops get + ** an aligned loop body plus a short branch. Not much effort is wasted, + ** because the abort happens very quickly and only once. + ** + ** 2. The IR is immovable, since the MCode embeds pointers to various + ** constants inside the IR. But RENAMEs may need to be added to the IR + ** during assembly, which might grow and reallocate the IR. We check + ** at the end if the IR (in J->cur.ir) has actually grown, resize the + ** copy (in J->curfinal.ir) and try again. + ** + ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have + ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to + ** always have one spare slot in the IR (see above), which means we + ** have to redo the assembly for only ~2% of all traces. + ** + ** Very, very rarely, this needs to be done repeatedly, since the + ** location of constants inside the IR (actually, reachability from + ** a global pointer) may affect register allocation and thus the + ** number of RENAMEs. + */ + for (;;) { as->mcp = as->mctop; #ifdef LUA_USE_ASSERT as->mcp_prev = as->mcp; #endif - as->curins = T->nins; + as->ir = J->curfinal->ir; /* Use the copied IR. */ + as->curins = J->cur.nins = as->orignins; + RA_DBG_START(); RA_DBGX((as, "===== STOP =====")); @@ -2242,22 +2347,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) checkmclim(as); asm_ir(as, ir); } - } while (as->realign); /* Retry in case the MCode needs to be realigned. */ - /* Emit head of trace. */ - RA_DBG_REF(); - checkmclim(as); - if (as->gcsteps > 0) { - as->curins = as->T->snap[0].ref; - asm_snap_prep(as); /* The GC check is a guard. */ - asm_gc_check(as); + if (as->realign && J->curfinal->nins >= T->nins) + continue; /* Retry in case only the MCode needs to be realigned. */ + + /* Emit head of trace. */ + RA_DBG_REF(); + checkmclim(as); + if (as->gcsteps > 0) { + as->curins = as->T->snap[0].ref; + asm_snap_prep(as); /* The GC check is a guard. */ + asm_gc_check(as); + as->curins = as->stopins; + } + ra_evictk(as); + if (as->parent) + asm_head_side(as); + else + asm_head_root(as); + asm_phi_fixup(as); + + if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ + lua_assert(J->curfinal->nk == T->nk); + memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, + (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ + T->nins = J->curfinal->nins; + break; /* Done. */ + } + + /* Otherwise try again with a bigger IR. */ + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ + J->curfinal = lj_trace_alloc(J->L, T); + as->realign = NULL; } - ra_evictk(as); - if (as->parent) - asm_head_side(as); - else - asm_head_root(as); - asm_phi_fixup(as); RA_DBGX((as, "===== START ====")); RA_DBG_FLUSH(); @@ -2270,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) if (!as->loopref) asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); +#if LJ_TARGET_MCODE_FIXUP + asm_mcode_fixup(T->mcode, T->szmcode); +#endif lj_mcode_sync(T->mcode, origtop); } diff --git a/luajit-2.1/src/lj_asm.h b/luajit-2.1/src/lj_asm.h index 85f2976..2819481 100644 --- a/luajit-2.1/src/lj_asm.h +++ b/luajit-2.1/src/lj_asm.h @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ASM_H diff --git a/luajit-2.1/src/lj_asm_arm.h b/luajit-2.1/src/lj_asm_arm.h index 81843ca..37bfa40 100644 --- a/luajit-2.1/src/lj_asm_arm.h +++ b/luajit-2.1/src/lj_asm_arm.h @@ -1,6 +1,6 @@ /* ** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -426,7 +426,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP); + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ if (hiop && ra_hasreg((ir+1)->r)) @@ -520,8 +520,6 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); } -#else -#define asm_tobit(as, ir) lua_assert(0) #endif static void asm_conv(ASMState *as, IRIns *ir) @@ -911,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); @@ -1000,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; + if (ir->op1 == REF_NIL) { + lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); + ARMIns ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); + return; + } } + ofs = field_ofs[ir->op2]; + if ((ai & 0x04000000)) + emit_lso(as, ai, dest, idx, ofs); + else + emit_lsox(as, ai, dest, idx, ofs); } - ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); } static void asm_fstore(ASMState *as, IRIns *ir) @@ -1372,8 +1373,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) else asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } -#else -#define asm_fpmath(as, ir) lua_assert(0) #endif static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) @@ -1492,13 +1491,7 @@ static void asm_mul(ASMState *as, IRIns *ir) #define asm_subov(as, ir) asm_sub(as, ir) #define asm_mulov(as, ir) asm_mul(as, ir) -#if LJ_SOFTFP -#define asm_div(as, ir) lua_assert(0) -#define asm_pow(as, ir) lua_assert(0) -#define asm_abs(as, ir) lua_assert(0) -#define asm_atan2(as, ir) lua_assert(0) -#define asm_ldexp(as, ir) lua_assert(0) -#else +#if !LJ_SOFTFP #define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) diff --git a/luajit-2.1/src/lj_asm_arm64.h b/luajit-2.1/src/lj_asm_arm64.h new file mode 100644 index 0000000..8fd92e7 --- /dev/null +++ b/luajit-2.1/src/lj_asm_arm64.h @@ -0,0 +1,2022 @@ +/* +** ARM64 IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_allocref(as, ir->op2, allow); + left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_allocref(as, ir->op1, allow); + right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Setup all needed exit stubs. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); + mxp--; + *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); + *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 3; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guardcc(ASMState *as, A64CC cc) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_cond_branch(as, cc^1, p-1); + return; + } + emit_cond_branch(as, cc, target); +} + +/* Emit test and branch instruction to exit for guard. */ +static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_tnb(as, ai^0x01000000u, r, bit, p-1); + return; + } + emit_tnb(as, ai, r, bit, target); +} + +/* Emit compare and branch instruction to exit for guard. */ +static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | ((target-p) & 0x03ffffffu); + emit_cnb(as, ai^0x01000000u, r, p-1); + return; + } + emit_cnb(as, ai, r, target); +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) +{ + if (irref_isk(ref)) { + IRIns *ir = IR(ref); + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } + } + return 0; +} + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +#define FUSE_REG 0x40000000 + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, + A64Ins ins) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } else { + Reg base = ra_alloc1(as, ir->op1, allow); + *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); + return base; + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + int64_t ofs = glofs(as, &uv->tv); + if (emit_checkofs(ins, ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse m operand into arithmetic/logic instructions. */ +static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_hasreg(ir->r)) { + ra_noweak(as, ir->r); + return A64F_M(ir->r); + } else if (irref_isk(ref)) { + uint32_t m; + int64_t k = get_k64val(ir); + if ((ai & 0x1f000000) == 0x0a000000) + m = emit_isk13(k, irt_is64(ir->t)); + else + m = emit_isk12(k); + if (m) + return m; + } else if (mayfuse(as, ref)) { + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) { + A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : + ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; + int shift = ir->o == IR_ADD ? 1 : + (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + IRIns *irl = IR(ir->op1); + if (sh == A64SH_LSL && + irl->o == IR_CONV && + irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && + shift <= 4 && + canfuse(as, irl)) { + Reg m = ra_alloc1(as, irl->op1, allow); + return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); + } else { + Reg m = ra_alloc1(as, ir->op1, allow); + return A64F_M(m) | A64F_SH(sh, shift); + } + } else if (ir->o == IR_CONV && + ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { + Reg m = ra_alloc1(as, ir->op1, allow); + return A64F_M(m) | A64F_EX(A64EX_SXTW); + } + } + return A64F_M(ra_allocref(as, ref, allow)); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, + RegSet allow) +{ + IRIns *ir = IR(ref); + Reg base; + int32_t ofs = 0; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + if (ir->o == IR_ADD) { + if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { + ref = ir->op1; + } else { + Reg rn, rm; + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref); + if (mayfuse(as, irl->op1)) { + unsigned int shift = 4; + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + shift = (IR(irl->op2)->i & 63); + } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { + shift = 1; + } + if ((ai >> 30) == shift) { + lref = irl->op1; + irl = IR(lref); + ai |= A64I_LS_SH; + } + } + if (irl->o == IR_CONV && + irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && + canfuse(as, irl)) { + lref = irl->op1; + ai |= A64I_LS_SXTWx; + } else { + ai |= A64I_LS_LSLx; + } + rm = ra_alloc1(as, lref, allow); + rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); + emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm); + return; + } + } else if (ir->o == IR_STRREF) { + if (asm_isk32(as, ir->op2, &ofs)) { + ref = ir->op1; + } else if (asm_isk32(as, ir->op1, &ofs)) { + ref = ir->op2; + } else { + Reg rn = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + uint32_t m; + if (irr+1 == ir && !ra_used(irr) && + irr->o == IR_ADD && irref_isk(irr->op2)) { + ofs = sizeof(GCstr) + IR(irr->op2)->i; + if (emit_checkofs(ai, ofs)) { + Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); + m = A64F_M(rm) | A64F_EX(A64EX_SXTW); + goto skipopm; + } + } + m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); + ofs = sizeof(GCstr); + skipopm: + emit_lso(as, ai, rd, rd, ofs); + emit_dn(as, A64I_ADDx^m, rd, rn); + return; + } + ofs += sizeof(GCstr); + if (!emit_checkofs(ai, ofs)) { + Reg rn = ra_alloc1(as, ref, allow); + Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); + emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); + return; + } + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, ai, (rd & 31), base, ofs); +} + +/* Fuse FP multiply-add/sub. */ +static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ai = air, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + +/* Fuse BAND + BSHL/BSHR into UBFM. */ +static int asm_fuseandshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + lua_assert(ir->o == IR_BAND); + if (canfuse(as, irl) && irref_isk(ir->op2)) { + uint64_t mask = get_k64val(IR(ir->op2)); + if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { + int32_t shmask = irt_is64(irl->t) ? 63 : 31; + int32_t shift = (IR(irl->op2)->i & shmask); + int32_t imms = shift; + if (irl->o == IR_BSHL) { + mask >>= shift; + shift = (shmask-shift+1) & shmask; + imms = 0; + } + if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, irl->op1, RSET_GPR); + A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; + imms += 63 - emit_clz64(mask); + if (imms > shmask) imms = shmask; + emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); + return 1; + } + } + } + return 0; +} + +/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ +static int asm_fuseorshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + lua_assert(ir->o == IR_BOR); + if (canfuse(as, irl) && canfuse(as, irr) && + ((irl->o == IR_BSHR && irr->o == IR_BSHL) || + (irl->o == IR_BSHL && irr->o == IR_BSHR))) { + if (irref_isk(irl->op2) && irref_isk(irr->op2)) { + IRRef lref = irl->op1, rref = irr->op1; + uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; + if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ + uint32_t tmp2; + IRRef tmp1 = lref; lref = rref; rref = tmp1; + tmp2 = lshift; lshift = rshift; rshift = tmp2; + } + if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { + A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); + emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); + return 1; + } + } + } + return 0; +} + +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lua_assert(!irt_ispri(ir->t)); + if (irt_isfp(ir->t)) { + if (ci->flags & CCI_CASTU64) { + Reg dest = ra_dest(as, ir, RSET_FPR) & 31; + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else { + ra_destreg(as, ir, RID_RET); + } + } + UNUSED(ci); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(ir_k64(irf)->u64); + } else { /* Need a non-argument register for indirect calls. */ + Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + emit_n(as, A64I_BLR, freg); + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + /* Need to force a spill on REF_BASE now to update the stack slot. */ + emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_CMPx, RID_TMP, + ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, A64I_LDRx, RID_TMP, base, -8); +} + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); + emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); + emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); + emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lua_assert(irt_type(ir->t) != st); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, + (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : + (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : + (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); + emit_dn(as, ai, (dest & 31), left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lua_assert(irt_isint(ir->t) && st == IRT_NUM); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + A64Ins ai = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : + (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : + (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); + emit_dn(as, ai, dest, (left & 31)); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = st == IRT_I8 ? A64I_SXTBw : + st == IRT_U8 ? A64I_UXTBw : + st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; + lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + emit_dn(as, ai, dest, left); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64 || !(ir->op2 & IRCONV_SEXT)) { + /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dn(as, A64I_SXTW, dest, left); + } + } else { + if (st64) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dm(as, A64I_MOVw, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + Reg dest = 0, tmp; + int destused = ra_used(ir); + int32_t ofs = 0; + ra_evictset(as, RSET_SCRATCH); + if (destused) { + if (ra_hasspill(ir->s)) { + ofs = sps_scale(ir->s); + destused = 0; + if (ra_hasreg(ir->r)) { + ra_free(as, ir->r); + ra_modified(as, ir->r); + emit_spload(as, ir, ir->r, ofs); + } + } else { + dest = ra_dest(as, ir, RSET_FPR); + } + } + if (destused) + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); + asm_guardcnb(as, A64I_CBZ, RID_RET); + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) { + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); + } else { + Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +{ + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i64ptr(ir_knum(ir)), dest); + } else { + /* Otherwise force a spill and use the spill slot. */ + emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + asm_tvstore64(as, dest, 0, ref); + ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); + } +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); + if (k) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dn(as, A64I_ADDx^k, dest, base); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = 0, tmp = RID_TMP; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(ir->op2); + IRType1 kt = irkey->t; + uint32_t k = 0; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + + if (!isk) { + key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); + rset_clear(allow, key); + if (!irt_isstr(kt)) { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + } else if (irt_isnum(kt)) { + int64_t val = (int64_t)ir_knum(irkey)->u64; + if (!(k = emit_isk12(val))) { + key = ra_allock(as, val, allow); + rset_clear(allow, key); + } + } else if (!irt_ispri(kt)) { + if (!(k = emit_isk12(irkey->i))) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; + if (merge == IR_NE) + asm_guardcc(as, CC_AL); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_n(as, A64I_CMPx^A64I_K12^0, dest); + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) + asm_guardcc(as, CC_EQ); + else + emit_cond_branch(as, CC_EQ, l_end); + + if (irt_isnum(kt)) { + if (isk) { + /* Assumes -0.0 is already canonicalized to +0.0. */ + if (k) + emit_n(as, A64I_CMPx^k, tmp); + else + emit_nm(as, A64I_CMPx, key, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); + Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); + rset_clear(allow, tisnum); + emit_nm(as, A64I_FCMPd, key, ftmp); + emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); + emit_cond_branch(as, CC_LO, l_next); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); + } + } else if (irt_isaddr(kt)) { + Reg scr; + if (isk) { + int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + scr = ra_allock(as, kk, allow); + emit_nm(as, A64I_CMPx, scr, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + scr = ra_scratch(as, allow); + emit_nm(as, A64I_CMPx, tmp, scr); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + } + rset_clear(allow, scr); + } else { + Reg type, scr; + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + scr = ra_scratch(as, rset_clear(allow, type)); + rset_clear(allow, scr); + emit_nm(as, A64I_CMPw, scr, type); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); + } + + *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; + if (!isk && irt_isaddr(kt)) { + Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); + rset_clear(allow, type); + } + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(irkey) : 1; + if (khash == 0) { + emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); + emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); + if (isk) { + Reg tmphash = ra_allock(as, khash, allow); + emit_dnm(as, A64I_ANDw, dest, dest, tmphash); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { + /* Fetch of str->hash is cheaper than ra_allock. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); + emit_dnm(as, A64I_SUBw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); + emit_dnm(as, A64I_EORw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_SUBw, tmp, tmp, dest); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); + emit_dnm(as, A64I_EORw, tmp, tmp, dest); + if (irt_isnum(kt)) { + emit_dnm(as, A64I_ADDw, dest, dest, dest); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVw, tmp, dest); + emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + } else { + checkmclim(as); + emit_dm(as, A64I_MOVw, tmp, key); + emit_dnm(as, A64I_EORw, dest, dest, + ra_allock(as, irt_toitype(kt) << 15, allow)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVx, dest, key); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !emit_checkofs(A64I_LDRx, ofs); + RegSet allow = RSET_GPR; + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, allow); + Reg key = ra_scratch(as, rset_clear(allow, node)); + Reg idx = node; + uint64_t k; + lua_assert(ofs % sizeof(Node) == 0); + rset_clear(allow, key); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_opk(as, A64I_ADDx, dest, node, ofs, allow); + } + asm_guardcc(as, CC_NE); + if (irt_ispri(irkey->t)) { + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = ir_knum(irkey)->u64; + } else { + k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); + } + emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); + emit_lso(as, A64I_LDRx, key, idx, kofs); + if (bigofs) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, A64I_LDRx, dest, v); + } else { + Reg uv = ra_scratch(as, RSET_GPR); + Reg func = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->o == IR_UREFC) { + asm_guardcc(as, CC_NE); + emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); + emit_opk(as, A64I_ADDx, dest, uv, + (int32_t)offsetof(GCupval, tv), RSET_GPR); + emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + } else { + emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + } + emit_lso(as, A64I_LDRx, uv, func, + (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lua_assert(!ra_used(ir)); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + uint32_t m; + rset_clear(allow, base); + if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { + emit_dn(as, A64I_ADDx^m, dest, base); + } else { + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); + emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static A64Ins asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return A64I_LDRB ^ A64I_LS_S; + case IRT_U8: return A64I_LDRB; + case IRT_I16: return A64I_LDRH ^ A64I_LS_S; + case IRT_U16: return A64I_LDRH; + case IRT_NUM: return A64I_LDRd; + case IRT_FLOAT: return A64I_LDRs; + default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; + } +} + +static A64Ins asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return A64I_STRB; + case IRT_I16: case IRT_U16: return A64I_STRH; + case IRT_NUM: return A64I_STRd; + case IRT_FLOAT: return A64I_STRs; + default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx; + A64Ins ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); + return; + } + } + ofs = field_ofs[ir->op2]; + } + emit_lso(as, ai, (dest & 31), idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); +} + +static void asm_xstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src)); + } +} + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg idx, tmp, type; + int32_t ofs = 0; + RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + irt_isint(ir->t)); + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, allow); + tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; + if (irt_isaddr(ir->t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if (irt_isnum(ir->t)) { + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + } else if (irt_isint(ir->t)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + } else { + tmp = ra_scratch(as, gpr); + } + type = ra_scratch(as, rset_clear(gpr, tmp)); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + /* Always do the type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); + if (irt_type(ir->t) >= IRT_NUM) { + lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + } else if (irt_isaddr(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } else if (irt_isnil(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else { + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); + } + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_LDRx, tmp, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; + int32_t ofs = 0; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); + else + emit_lso(as, A64I_STRd, (src & 31), idx, ofs); + } else { + if (!irt_ispri(ir->t)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) + type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); + else + type = ra_allock(as, irt_toitype(ir->t), allow); + } else { + tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), + A64I_STRx); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_STRx, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); + } + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + int32_t ofs = 8*((int32_t)ir->op1-2); + IRType1 t = ir->t; + Reg dest = RID_NONE, base; + RegSet allow = RSET_GPR; + lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ + lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); + if (irt_isaddr(t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if ((ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { + emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); +dotypecheck: + rset_clear(allow, base); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + Reg tmp; + if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { + tmp = dest; + } else { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); + if (irt_type(t) >= IRT_NUM) { + lua_assert(irt_isinteger(t) || irt_isnum(t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, allow), tmp); + } else if (irt_isnil(t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else if (irt_ispri(t)) { + emit_nm(as, A64I_CMPx, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); + } else { + Reg type = ra_scratch(as, allow); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } + emit_lso(as, A64I_LDRx, tmp, base, ofs); + return; + } + if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? A64I_LDRd : + (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, + ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCcdata * */ + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); + Reg r = ra_alloc1(as, ir->op2, allow); + lua_assert(sz == 4 || sz == 8); + emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + { + Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); + emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); + emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); + emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); + if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); + } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#else +#define asm_cnew(as, ir) ((void)0) +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg gr = ra_allock(as, i64ptr(J2G(as->J)), + rset_exclude(rset_exclude(RSET_GPR, tab), link)); + Reg mark = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_lso(as, A64I_STRx, tab, gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); + emit_lso(as, A64I_LDRx, link, gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); + emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + RegSet allow = RSET_GPR; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lua_assert(IR(ir->op1)->o == IR_UREFC); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(allow, obj)); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, A64I_LDRB, tmp, obj, + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_dn(as, ai, (dest & 31), (left & 31)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm == IRFPM_SQRT) { + asm_fpunary(as, ir, A64I_FSQRTd); + } else if (fpm <= IRFPM_TRUNC) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : + fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); + } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { + return; + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); + } +} + +static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) +{ + IRIns *ir; + if (irref_isk(rref)) + return 0; /* Don't swap constants to the left. */ + if (irref_isk(lref)) + return 1; /* But swap constants to the right. */ + ir = IR(rref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) + return 0; /* Don't swap fusable operands to the left. */ + ir = IR(lref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) + return 1; /* But swap fusable operands to the right. */ + return 0; /* Otherwise don't swap. */ +} + +static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_hintalloc(as, lref, dest, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_VS); + ai |= A64I_S; + } + emit_dn(as, ai^m, dest, left); +} + +static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) +{ + if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai |= A64I_S; + } + asm_intop(as, ir, ai); +} + +static void asm_intneg(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); +} + +/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ +static void asm_intmul(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* IR_MULOV */ + asm_guardcc(as, CC_NE); + emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ + emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); + emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_dnm(as, A64I_SMULL, dest, right, left); + } else { + emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); + } +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) + asm_fparith(as, ir, A64I_FADDd); + return; + } + asm_intop_s(as, ir, A64I_ADDw); +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) + asm_fparith(as, ir, A64I_FSUBd); + return; + } + asm_intop_s(as, ir, A64I_SUBw); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, A64I_FMULd); + return; + } + asm_intmul(as, ir); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fparith(as, ir, A64I_FDIVd); +} + +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_powi); +} + +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, A64I_FNEGd); + return; + } + asm_intneg(as, ir); +} + +static void asm_band(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_ANDw; + if (asm_fuseandshift(as, ir)) + return; + if (as->flagmcp == as->mcp) { + /* Try to drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai = A64I_ANDSw; + } + asm_intop(as, ir, ai); +} + +static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref), *irr = IR(rref); + if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || + (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if (irl->o == IR_BNOT) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_alloc1(as, lref, RSET_GPR); + ai |= A64I_ON; + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); + emit_dn(as, ai^m, dest, left); + } else { + asm_intop(as, ir, ai); + } +} + +static void asm_bor(ASMState *as, IRIns *ir) +{ + if (asm_fuseorshift(as, ir)) + return; + asm_borbxor(as, ir, A64I_ORRw); +} + +#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_MVNw; + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + emit_d(as, ai^m, dest); +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); +} + +static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) +{ + int32_t shmask = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + Reg left, dest = ra_dest(as, ir, RSET_GPR); + int32_t shift = (IR(ir->op2)->i & shmask); + IRIns *irl = IR(ir->op1); + if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; + + /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ + if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + int32_t shift2 = (IR(irl->op2)->i & shmask); + shift = ((shift - shift2) & shmask); + shmask -= shift2; + ir = irl; + } + } + + left = ra_alloc1(as, ir->op1, RSET_GPR); + switch (sh) { + case A64SH_LSL: + emit_dn(as, ai | A64F_IMMS(shmask-shift) | + A64F_IMMR((shmask-shift+1)&shmask), dest, left); + break; + case A64SH_LSR: case A64SH_ASR: + emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); + break; + case A64SH_ROR: + emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); + break; + } + } else { /* Variable-length shifts. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); + } +} + +#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) +#define asm_brol(as, ir) lua_assert(0) + +static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); + emit_nm(as, A64I_CMPw, left, right); +} + +static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) +{ + Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = ((left >> 8) & 31); left &= 31; + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); + emit_nm(as, A64I_FCMPd, left, right); +} + +static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) +{ + if (irt_isnum(ir->t)) + asm_fpmin_max(as, ir, fcc); + else + asm_intmin_max(as, ir, cc); +} + +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) + +/* -- Comparisons --------------------------------------------------------- */ + +/* Map of comparisons to flags. ORDER IR. */ +static const uint8_t asm_compmap[IR_ABC+1] = { + /* op FP swp int cc FP cc */ + /* LT */ CC_GE + (CC_HS << 4), + /* GE x */ CC_LT + (CC_HI << 4), + /* LE */ CC_GT + (CC_HI << 4), + /* GT x */ CC_LE + (CC_HS << 4), + /* ULT x */ CC_HS + (CC_LS << 4), + /* UGE */ CC_LO + (CC_LO << 4), + /* ULE x */ CC_HI + (CC_LO << 4), + /* UGT */ CC_LS + (CC_LS << 4), + /* EQ */ CC_NE + (CC_NE << 4), + /* NE */ CC_EQ + (CC_EQ << 4), + /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ +}; + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + Reg left, right; + A64Ins ai; + int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); + if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { + left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); + right = 0; + ai = A64I_FCMPZd; + } else { + left = ra_alloc2(as, ir, RSET_FPR); + if (swp) { + right = (left & 31); left = ((left >> 8) & 31); + } else { + right = ((left >> 8) & 31); left &= 31; + } + ai = A64I_FCMPd; + } + asm_guardcc(as, (asm_compmap[ir->o] >> 4)); + emit_nm(as, ai, left, right); +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + A64CC oldcc, cc = (asm_compmap[ir->o] & 15); + A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; + IRRef lref = ir->op1, rref = ir->op2; + Reg left; + uint32_t m; + int cmpprev0 = 0; + lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ + else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ + } + oldcc = cc; + if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { + IRIns *irl = IR(lref); + if (cc == CC_GE) cc = CC_PL; + else if (cc == CC_LT) cc = CC_MI; + else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ + cmpprev0 = (irl+1 == ir); + /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ + if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { + IRRef blref = irl->op1, brref = irl->op2; + uint32_t m2 = 0; + Reg bleft; + if (asm_swapops(as, blref, brref)) { + Reg tmp = blref; blref = brref; brref = tmp; + } + if (irref_isk(brref)) { + uint64_t k = get_k64val(IR(brref)); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, + ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + return; + } + m2 = emit_isk13(k, irt_is64(irl->t)); + } + bleft = ra_alloc1(as, blref, RSET_GPR); + ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); + if (!m2) + m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); + asm_guardcc(as, cc); + emit_n(as, ai^m2, bleft); + return; + } + if (cc == CC_EQ || cc == CC_NE) { + /* Combine cmp-bcc into cbz/cbnz. */ + ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; + if (irt_is64(ir->t)) ai |= A64I_X; + asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); + return; + } + } +nocombine: + left = ra_alloc1(as, lref, RSET_GPR); + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + asm_guardcc(as, cc); + emit_n(as, ai^m, left); + /* Signed comparison with zero and referencing previous ins? */ + if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) + as->flagmcp = as->mcp; /* Allow elimination of the compare. */ +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + +/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + uint32_t k = emit_isk13(HOOK_PROFILE, 0); + lua_assert(k != 0); + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, A64I_TSTw^k, RID_TMP); + emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + Reg pbase; + uint32_t k; + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; + lua_assert(ra_hasreg(pbase)); + } else if (allow) { + pbase = rset_pickbot(allow); + } else { + pbase = RID_RET; + emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ + } + } else { + pbase = RID_BASE; + } + emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + k = emit_isk12((8*topslot)); + lua_assert(k); + emit_n(as, A64I_CMPx^k, RID_TMP); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, + (int32_t)offsetof(lua_State, maxstack)); + if (irp) { /* Must not spill arbitrary registers in head of side trace. */ + if (ra_hasspill(irp->s)) + emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); + emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); + if (ra_hasspill(irp->s) && !allow) + emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ + } else { + emit_getgl(as, RID_TMP, cur_L); + } +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + checkmclim(as); + } + lua_assert(map + nent == flinks); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp1, tmp2; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + tmp1 = ra_releasetmp(as, ASMREF_TMP1); + tmp2 = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp2, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_cond_branch(as, CC_LS, l_end); + emit_nm(as, A64I_CMPx, RID_TMP, tmp2); + emit_lso(as, A64I_LDRx, tmp2, tmp1, + (int32_t)offsetof(global_State, gc.threshold)); + emit_lso(as, A64I_LDRx, RID_TMP, tmp1, + (int32_t)offsetof(global_State, gc.total)); + ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->loopinv) { /* Inverted loop branch? */ + uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; + ptrdiff_t delta = target - (p - 2); + /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ + p[-2] |= ((uint32_t)delta & mask) << 5; + } else { + ptrdiff_t delta = target - (p - 1); + p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); + } +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Reload L register from g->cur_L. */ +static void asm_head_lreg(ASMState *as) +{ + IRIns *ir = IR(ASMREF_L); + if (ra_used(ir)) { + Reg r = ra_dest(as, ir, RSET_GPR); + emit_getgl(as, r, cur_L); + ra_evictk(as); + } +} + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + ra_destreg(as, ir, RID_BASE); +} + +/* Coalesce BASE register for a side trace. */ +static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + if (ra_hasspill(irp->s)) { + rset_clear(allow, ra_dest(as, ir, allow)); + } else { + Reg r = irp->r; + lua_assert(ra_hasreg(r)); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); + ra_destreg(as, ir, r); + } + return allow; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target; + /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ + int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); + if (spadj == 0) { + *--p = A64I_LE(A64I_NOP); + as->mctop = p; + } else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(spadj); + lua_assert(k); + p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; + p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 1; /* Leave room for exit branch. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + *p = 0; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots += 2; + } else { + if (ngpr > 0) ngpr--; else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + /* May need extra exit for asm_stack_check on side traces. */ + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +#if LJ_BE +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +static void asm_mcode_fixup(MCode *mcode, MSize size) +{ + MCode *pe = (MCode *)((char *)mcode + size); + while (mcode < pe) { + MCode ins = *mcode; + *mcode++ = lj_bswap(ins); + } +} +#define LJ_TARGET_MCODE_FIXUP 1 +#endif + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *cstart = NULL, *cend = p; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MCode *px = exitstub_trace_addr(T, exitno); + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + MCode ins = A64I_LE(*p); + if ((ins & 0xff000000u) == 0x54000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch bcc exitstub. */ + *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0xfc000000u) == 0x14000000u && + ((ins ^ (px-p)) & 0x03ffffffu) == 0) { + /* Patch b exitstub. */ + *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x34000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch cbz/cbnz exitstub. */ + *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x36000000u && + ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { + /* Patch tbz/tbnz exitstub. */ + *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); + cend = p+1; + if (!cstart) cstart = p; + } + } + lua_assert(cstart != NULL); + lj_mcode_sync(cstart, cend); + lj_mcode_patch(J, mcarea, 1); +} + diff --git a/luajit-2.1/src/lj_asm_mips.h b/luajit-2.1/src/lj_asm_mips.h index adea0e3..affe7d8 100644 --- a/luajit-2.1/src/lj_asm_mips.h +++ b/luajit-2.1/src/lj_asm_mips.h @@ -1,6 +1,6 @@ /* ** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) { Reg r = IR(ref)->r; if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) return RID_ZERO; r = ra_allocref(as, ref, allow); } else { @@ -166,9 +166,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) } else if (ir->o == IR_UREFC) { if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { + intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; + intptr_t jgl = (intptr_t)J2G(as->J); + if ((uintptr_t)(ofs-jgl) < 65536) { *ofsp = ofs-jgl-32768; return RID_JGL; } else { @@ -190,20 +190,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg base; if (ra_noreg(ir->r) && canfuse(as, ir)) { if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { + intptr_t ofs2; + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), + checki16(ofs2))) { ref = ir->op1; - ofs = ofs2; + ofs = (int32_t)ofs2; } } else if (ir->o == IR_STRREF) { - int32_t ofs2 = 65536; + intptr_t ofs2 = 65536; lua_assert(ofs == 0); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { - ofs2 = ofs + IR(ir->op2)->i; + ofs2 = ofs + get_kval(IR(ir->op2)); ref = ir->op1; } else if (irref_isk(ir->op1)) { - ofs2 = ofs + IR(ir->op1)->i; + ofs2 = ofs + get_kval(IR(ir->op1)); ref = ir->op2; } if (!checki16(ofs2)) { @@ -211,7 +212,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg right, left = ra_alloc2(as, ir, allow); right = (left >> 8); left &= 255; emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); + emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); return; } ofs = ofs2; @@ -227,28 +228,40 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 16; + int32_t ofs = LJ_32 ? 16 : 0; +#if LJ_SOFTFP + Reg gpr = REGARG_FIRSTGPR; +#else Reg gpr, fpr = REGARG_FIRSTFPR; +#endif if ((void *)ci->func) - emit_call(as, (void *)ci->func); + emit_call(as, (void *)ci->func, 1); +#if !LJ_SOFTFP for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; +#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; if (ref) { IRIns *ir = IR(ref); +#if !LJ_SOFTFP if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && !(ci->flags & CCI_VARARG)) { lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ra_leftov(as, fpr, ref); - fpr += 2; - gpr += irt_isnum(ir->t) ? 2 : 1; - } else { + fpr += LJ_32 ? 2 : 1; + gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; + } else +#endif + { +#if LJ_32 && !LJ_SOFTFP fpr = REGARG_LASTFPR+1; - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; +#endif + if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; if (gpr <= REGARG_LASTGPR) { lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { RegSet of = as->freeset; Reg r; @@ -257,31 +270,55 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) r = ra_alloc1(as, ref, RSET_FPR); as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) { +#if LJ_32 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ gpr += 2; +#else + emit_tg(as, MIPSI_DMFC1, gpr, r); + gpr++; fpr++; +#endif } else if (irt_isfloat(ir->t)) { emit_tg(as, MIPSI_MFC1, gpr, r); gpr++; +#if LJ_64 + fpr++; +#endif } - } else { + } else +#endif + { ra_leftov(as, gpr, ref); gpr++; +#if LJ_64 + fpr++; +#endif } } else { - Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +#if LJ_32 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; +#else + emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; +#endif } } } else { +#if !LJ_SOFTFP fpr = REGARG_LASTFPR+1; - if (gpr <= REGARG_LASTGPR) +#endif + if (gpr <= REGARG_LASTGPR) { gpr++; - else - ofs += 4; +#if LJ_64 + fpr++; +#endif + } else { + ofs += LJ_32 ? 4 : 8; + } } checkmclim(as); } @@ -291,35 +328,51 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP); +#if LJ_32 + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#endif +#if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; +#endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ +#if LJ_32 if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ +#endif ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { + if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { int32_t ofs = sps_scale(ir->s); Reg dest = ir->r; if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); +#if LJ_32 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); +#else + emit_tg(as, MIPSI_DMTC1, RID_RET, dest); +#endif } if (ofs) { +#if LJ_32 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); +#else + emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); +#endif } } else { ra_destreg(as, ir, RID_FPRET); } +#if LJ_32 } else if (hiop) { ra_destpair(as, ir); +#endif } else { ra_destreg(as, ir, RID_RET); } @@ -338,7 +391,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); + ci.func = (ASMFunction)(void *)get_kval(irf); } else { /* Need specific register for indirect calls. */ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); MCode *p = as->mcp; @@ -353,6 +406,7 @@ static void asm_callx(ASMState *as, IRIns *ir) asm_gencall(as, &ci, args); } +#if !LJ_SOFTFP static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ @@ -361,9 +415,10 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); ra_destreg(as, ir, RID_FPRET); - emit_call(as, (void *)lj_ir_callinfo[id].func); + emit_call(as, (void *)lj_ir_callinfo[id].func, 0); ra_leftov(as, REGARG_FIRSTFPR, ir->op1); } +#endif /* -- Returns ------------------------------------------------------------- */ @@ -379,12 +434,13 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); } /* -- Type conversions ---------------------------------------------------- */ +#if !LJ_SOFTFP static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); @@ -406,15 +462,28 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fgh(as, MIPSI_ADD_D, tmp, left, right); } +#endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif +#if LJ_64 + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); +#endif IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); +#if LJ_32 lua_assert(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ +#endif +#if LJ_32 && LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ + lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lua_assert(irt_type(ir->t) != st); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -424,26 +493,50 @@ static void asm_conv(ASMState *as, IRIns *ir) /* y = (x ^ 0x8000000) + 2147483648.0 */ Reg left = ra_alloc1(as, lref, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D, - dest, dest, tmp); - emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, - dest, dest); if (irt_isfloat(ir->t)) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + emit_fg(as, MIPSI_CVT_S_D, dest, dest); + /* Must perform arithmetic with doubles to keep the precision. */ + emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); + emit_fg(as, MIPSI_CVT_D_W, dest, dest); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); +#if LJ_64 + } else if(st == IRT_U64) { /* U64 to FP conversion. */ + /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); + MCLabel l_end = emit_label(as); + if (irt_isfloat(ir->t)) { + emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_S_L, dest, dest); + } else { + emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_D_L, dest, dest); + } + emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); + emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); + emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); +#endif } else { /* Integer to FP conversion. */ Reg left = ra_alloc1(as, lref, RSET_GPR); +#if LJ_32 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, left, dest); +#else + MIPSIns mi = irt_isfloat(ir->t) ? + (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : + (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); + emit_fg(as, mi, dest, dest); + emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); +#endif } } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { @@ -454,7 +547,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { + if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); @@ -465,25 +558,65 @@ static void asm_conv(ASMState *as, IRIns *ir) tmp, left, tmp); if (st == IRT_FLOAT) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); +#if LJ_64 + } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ + MCLabel l_end; + emit_tg(as, MIPSI_DMFC1, dest, tmp); + l_end = emit_label(as); + /* For inputs >= 2^63 add -2^64 and convert again. */ + if (st == IRT_NUM) { + emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); + emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, dest)); + } else { + emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); + emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, dest)); + } +#endif } else { +#if LJ_32 emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, tmp, left); +#else + MIPSIns mi = irt_is64(ir->t) ? + (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : + (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); + emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); + emit_fg(as, mi, left, left); +#endif } } - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); if ((ir->op2 & IRCONV_SEXT)) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); } else { uint32_t shift = st == IRT_I8 ? 24 : 16; @@ -495,8 +628,35 @@ static void asm_conv(ASMState *as, IRIns *ir) (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); } } else { /* 32/64 bit integer conversions. */ +#if LJ_32 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ +#else + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); + } else { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_dta(as, MIPSI_SLL, dest, left, 0); + } else { /* 32 to 64 bit zero extension. */ + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } + } + } else { + if (st64) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } else { /* 32/32 bit no-op (cast). */ + /* Do nothing, but may need to move regs. */ + ra_leftov(as, dest, lref); + } + } +#endif } } } @@ -505,40 +665,101 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; + int32_t ofs = 0; +#if LJ_SOFTFP + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir+i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); + } + } + ofs = sps_scale(ir->s & ~1); + } else { + Reg rhi = ra_dest(as, ir+1, RSET_GPR); + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); + emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); + } + } +#else RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ra_evictset(as, drop); + ofs = sps_scale(ir->s); +#endif asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), - RID_SP, sps_scale(ir->s)); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), + RID_SP, ofs); } /* -- Memory references --------------------------------------------------- */ +#if LJ_64 +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, + rset_exclude(allow, src)); + emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); + emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); + } + } +} +#endif + /* Get pointer to TValue. */ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) { IRIns *ir = IR(ref); if (irt_isnum(ir->t)) { if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + ra_allockreg(as, igcptr(ir_knum(ir)), dest); else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); + emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_32 RegSet allow = rset_exclude(RSET_GPR, dest); Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); + emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); emit_setgl(as, src, tmptv.gcr); } - type = ra_allock(as, irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, ref+1, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); emit_setgl(as, type, tmptv.it); +#else + asm_tvstore64(as, dest, 0, ref); + emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, + (int32_t)(offsetof(global_State, tmptv)-32768)); +#endif } } @@ -553,13 +774,13 @@ static void asm_aref(ASMState *as, IRIns *ir) ofs += 8*IR(ir->op2)->i; if (checki16(ofs)) { base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); return; } } base = ra_alloc1(as, ir->op1, RSET_GPR); idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); + emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); } @@ -580,20 +801,44 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); +#if LJ_32 && LJ_SOFTFP + if (!isk) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + if (irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + type = tmpnum = (irkey+1)->r; + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + ra_noweak(as, tmpnum); + } else { + type = tmpnum = ra_allocref(as, refkey+1, allow); + } + rset_clear(allow, tmpnum); + } else { + type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); + rset_clear(allow, type); + } + } +#else if (irt_isnum(kt)) { key = ra_alloc1(as, refkey, RSET_FPR); tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); } else if (!irt_ispri(kt)) { key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); - type = ra_allock(as, irt_toitype(irkey->t), allow); +#if LJ_32 + type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); rset_clear(allow, type); +#endif } +#endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); @@ -605,9 +850,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) else if (destused) emit_loada(as, dest, niltvg(J2G(as->J))); /* Follow hash chain until the end. */ - emit_move(as, dest, tmp2); + emit_move(as, dest, tmp1); l_loop = --as->mcp; - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next)); + emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ @@ -615,42 +860,70 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); l_end = asm_exitstub_addr(as); } - if (irt_isnum(kt)) { + if (!LJ_SOFTFP && irt_isnum(kt)) { emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ - emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next); - emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM); + emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); + emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); +#if LJ_32 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); } else { if (irt_ispri(kt)) { - emit_branch(as, MIPSI_BEQ, tmp2, type, l_end); + emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); } else { - emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr)); - emit_branch(as, MIPSI_BNE, tmp2, type, l_next); + emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); + emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); + emit_branch(as, MIPSI_BNE, tmp1, type, l_next); } } - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu); + emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); +#else + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } else if (irt_isaddr(kt)) { + Reg refk = tmp2; + if (isk) { + int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + refk = ra_allock(as, k, allow); + rset_clear(allow, refk); + } + emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } else { + Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, pri); + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_dst(as, MIPSI_DADDU, tmp2, key, type); + rset_clear(allow, type); + } +#endif /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); + emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); lua_assert(sizeof(Node) == 24); emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); @@ -660,9 +933,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); - if (irt_isnum(kt)) { +#if LJ_32 + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp1); @@ -670,13 +944,35 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); } emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); +#if LJ_SOFTFP + emit_ds(as, MIPSI_MOVE, tmp1, type); + emit_ds(as, MIPSI_MOVE, tmp2, key); +#else emit_tg(as, MIPSI_MFC1, tmp2, key); emit_tg(as, MIPSI_MFC1, tmp1, key+1); +#endif } else { emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); } +#else + emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); + emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); + if (irt_isnum(kt)) { + emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); +#if !LJ_SOFTFP + emit_tg(as, MIPSI_DMFC1, tmp1, key); +#endif + } else { + checkmclim(as); + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, key, 0); + emit_dst(as, MIPSI_DADDU, tmp1, key, type); + } +#endif } } } @@ -689,17 +985,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t kofs = ofs + (int32_t)offsetof(Node, key); Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; +#if LJ_32 + Reg key = RID_NONE, type = RID_TMP; int32_t lo, hi; +#else + Reg key = ra_scratch(as, allow); + int64_t k; +#endif lua_assert(ofs % sizeof(Node) == 0); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); kofs = (int32_t)offsetof(Node, key); } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); } +#if LJ_32 if (!irt_ispri(irkey->t)) { key = ra_scratch(as, allow); rset_clear(allow, key); @@ -718,30 +1021,41 @@ nolo: asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); +#else + if (irt_ispri(irkey->t)) { + lua_assert(!irt_isnil(irkey->t)); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); + emit_tsi(as, MIPSI_LD, key, idx, kofs); +#endif if (ofs > 32736) - emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); + emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); } static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); + emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); + emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); } else { - emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); } - emit_tsi(as, MIPSI_LW, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -753,6 +1067,7 @@ static void asm_fref(ASMState *as, IRIns *ir) static void asm_strref(ASMState *as, IRIns *ir) { +#if LJ_32 Reg dest = ra_dest(as, ir, RSET_GPR); IRRef ref = ir->op2, refk = ir->op1; int32_t ofs = (int32_t)sizeof(GCstr); @@ -784,6 +1099,20 @@ static void asm_strref(ASMState *as, IRIns *ir) else emit_dst(as, MIPSI_ADDU, dest, r, ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); +#else + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { + emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); + } else { + emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); + emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); + } +#endif } /* -- Loads and stores ---------------------------------------------------- */ @@ -795,9 +1124,9 @@ static MIPSIns asm_fxloadins(IRIns *ir) case IRT_U8: return MIPSI_LBU; case IRT_I16: return MIPSI_LH; case IRT_U16: return MIPSI_LHU; - case IRT_NUM: return MIPSI_LDC1; - case IRT_FLOAT: return MIPSI_LWC1; - default: return MIPSI_LW; + case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; } } @@ -806,26 +1135,32 @@ static MIPSIns asm_fxstoreins(IRIns *ir) switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return MIPSI_SB; case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: return MIPSI_SDC1; - case IRT_FLOAT: return MIPSI_SWC1; - default: return MIPSI_SW; + case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; } } static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); MIPSIns mi = asm_fxloadins(ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; lua_assert(!irt_isfp(ir->t)); emit_tsi(as, mi, dest, idx, ofs); } @@ -845,7 +1180,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } @@ -853,7 +1189,8 @@ static void asm_xload(ASMState *as, IRIns *ir) static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg src = ra_alloc1z(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } @@ -863,27 +1200,59 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) static void asm_ahuvload(ASMState *as, IRIns *ir) { - IRType1 t = ir->t; + int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); Reg dest = RID_NONE, type = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = 0; + IRType1 t = ir->t; + if (hiop) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + } if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); +#if LJ_64 + if (irt_isaddr(t)) + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + else if (irt_isint(t)) + emit_dta(as, MIPSI_SLL, dest, dest, 0); +#endif } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); rset_clear(allow, idx); if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, type, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); } else { - asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); - if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } +#if LJ_32 + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + else + emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); } emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); +#else + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) { + emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dta(as, MIPSI_DSRA32, type, dest, 15); + emit_tsi(as, MIPSI_LD, dest, idx, ofs); +#endif } static void asm_ahustore(ASMState *as, IRIns *ir) @@ -893,46 +1262,86 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = 0; if (ir->r == RID_SINK) return; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_hsi(as, MIPSI_SDC1, src, idx, ofs); } else { +#if LJ_32 if (!irt_ispri(ir->t)) { src = ra_alloc1(as, ir->op2, allow); rset_clear(allow, src); } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, (ir+1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); if (ra_hasreg(src)) emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); +#else + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_tsi(as, MIPSI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, tmp, tmp, type); + emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, tmp, src, type); + } + } +#endif } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; + IRType1 t = ir->t; +#if LJ_32 + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; +#else + int32_t ofs = 8*((int32_t)ir->op1-2); +#endif lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); +#if LJ_32 && LJ_SOFTFP + lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } +#else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + } else +#endif + if (ra_used(ir)) { + lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { Reg tmp = ra_scratch(as, RSET_FPR); emit_tg(as, MIPSI_MFC1, dest, tmp); @@ -947,27 +1356,67 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = IRT_INT; /* Check for original type. */ } } +#if LJ_64 + else if (irt_isaddr(t)) { + /* Clear type from pointers. */ + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_dta(as, MIPSI_SLL, dest, dest, 0); + } +#endif goto dotypecheck; } base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); dotypecheck: - if (irt_isnum(t)) { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); +#if LJ_32 + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + if (ra_noreg(type)) type = RID_TMP; - } - if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); + } else { Reg ktype = ra_allock(as, irt_toitype(t), allow); - asm_guard(as, MIPSI_BNE, RID_TMP, ktype); - type = RID_TMP; + asm_guard(as, MIPSI_BNE, type, ktype); } - if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); } - if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); + } + if (ra_hasreg(type)) + emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); +#else + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else { + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); + if (ra_hasreg(dest)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + } else { + asm_guard(as, MIPSI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); + } + emit_tsi(as, MIPSI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + if (irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, + ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); + } +#endif } /* -- Allocations --------------------------------------------------------- */ @@ -994,8 +1443,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); +#if LJ_32 int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); if (sz == 8) { ofs += 4; lua_assert((ir+1)->o == IR_HIOP); @@ -1008,6 +1457,11 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; if (LJ_BE) ir++; else ir--; } +#else + emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), + RID_RET, sizeof(GCcdata)); +#endif + lua_assert(sz == 4 || sz == 8); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1042,7 +1496,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg link = RID_TMP; MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, link, gc.grayagain); @@ -1065,7 +1519,7 @@ static void asm_obar(ASMState *as, IRIns *ir) args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ir->op1; /* TValue *tv */ asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); obj = IR(ir->op1)->r; tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); @@ -1080,6 +1534,7 @@ static void asm_obar(ASMState *as, IRIns *ir) /* -- Arithmetic and logic operations ------------------------------------- */ +#if !LJ_SOFTFP static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -1106,62 +1561,116 @@ static void asm_fpmath(ASMState *as, IRIns *ir) else asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } +#endif static void asm_add(ASMState *as, IRIns *ir) { - if (irt_isnum(ir->t)) { + IRType1 t = ir->t; +#if !LJ_SOFTFP + if (irt_isnum(t)) { asm_fparith(as, ir, MIPSI_ADD_D); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if (checki16(k)) { - emit_tsi(as, MIPSI_ADDIU, dest, left, k); + emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, + left, k); return; } } right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, + left, right); } } static void asm_sub(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fparith(as, ir, MIPSI_SUB_D); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + left, right); } } static void asm_mul(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fparith(as, ir, MIPSI_MUL_D); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_MUL, dest, left, right); + if (LJ_64 && irt_is64(ir->t)) { + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_DMULT, 0, left, right); + } else { + emit_dst(as, MIPSI_MUL, dest, left, right); + } } } -#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +#if !LJ_SOFTFP +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_powi); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fparith(as, ir, MIPSI_DIV_D); +} +#endif static void asm_neg(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fpunary(as, ir, MIPSI_NEG_D); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + RID_ZERO, left); } } @@ -1172,6 +1681,7 @@ static void asm_neg(ASMState *as, IRIns *ir) static void asm_arithov(ASMState *as, IRIns *ir) { Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lua_assert(!irt_is64(ir->t)); if (irref_isk(ir->op2)) { int k = IR(ir->op2)->i; if (ir->o == IR_SUBOV) k = -k; @@ -1219,7 +1729,7 @@ static void asm_mulov(ASMState *as, IRIns *ir) emit_dst(as, MIPSI_MULT, 0, left, right); } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI static void asm_add64(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1321,7 +1831,8 @@ static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_MIPS32R2)) { +#if LJ_32 + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); } else { @@ -1336,6 +1847,15 @@ static void asm_bswap(ASMState *as, IRIns *ir) emit_dta(as, MIPSI_SRL, tmp, left, 24); emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); } +#else + if (irt_is64(ir->t)) { + emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); + emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); + } else { + emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); + emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); + } +#endif } static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) @@ -1343,7 +1863,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if (checku16(k)) { emit_tsi(as, mik, dest, left, k); return; @@ -1361,11 +1881,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) { Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); + uint32_t shift = (uint32_t)IR(ir->op2)->i; + if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; + emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), + (shift & 31)); } else { Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; + if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ } } @@ -1377,7 +1900,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) static void asm_bror(ASMState *as, IRIns *ir) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1396,9 +1919,25 @@ static void asm_bror(ASMState *as, IRIns *ir) } } +#if LJ_32 && LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir) +{ + CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; + IRRef args[4]; + args[0^LJ_BE] = ir->op1; + args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; + args[3^LJ_BE] = (ir+1)->op2; + asm_setupresult(as, ir, &ci); + emit_call(as, (void *)ci.func, 0); + ci.func = NULL; + asm_gencall(as, &ci, args); +} +#endif + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; @@ -1429,11 +1968,72 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) /* -- Comparisons --------------------------------------------------------- */ +#if LJ_32 && LJ_SOFTFP +/* SFP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; + args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; + + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); + } + ra_evictset(as, drop); + + asm_setupresult(as, ir, ci); + + switch ((IROp)ir->o) { + case IR_LT: + asm_guard(as, MIPSI_BGEZ, RID_RET, 0); + break; + case IR_ULT: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 1); + asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); + break; + case IR_GE: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 2); + asm_guard(as, MIPSI_BLTZ, RID_RET, 0); + break; + case IR_LE: + asm_guard(as, MIPSI_BGTZ, RID_RET, 0); + break; + case IR_GT: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 2); + asm_guard(as, MIPSI_BLEZ, RID_RET, 0); + break; + case IR_UGE: + asm_guard(as, MIPSI_BLTZ, RID_RET, 0); + break; + case IR_ULE: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 1); + break; + case IR_UGT: case IR_ABC: + asm_guard(as, MIPSI_BLEZ, RID_RET, 0); + break; + case IR_EQ: case IR_NE: + asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); + default: + break; + } + asm_gencall(as, ci, args); +} +#endif + static void asm_comp(ASMState *as, IRIns *ir) { /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ IROp op = ir->o; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); @@ -1441,13 +2041,13 @@ static void asm_comp(ASMState *as, IRIns *ir) } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); asm_guard(as, mi, left, 0); } else { if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(IR(ir->op2)); if ((op&2)) k++; if (checki16(k)) { asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -1466,9 +2066,10 @@ static void asm_comp(ASMState *as, IRIns *ir) static void asm_equal(ASMState *as, IRIns *ir) { - Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); + Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? + RSET_FPR : RSET_GPR); right = (left >> 8); left &= 255; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); } else { @@ -1476,7 +2077,7 @@ static void asm_equal(ASMState *as, IRIns *ir) } } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) { @@ -1518,23 +2119,48 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI +#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ +#if LJ_HASFFI && !LJ_SOFTFP if (usehi || uselo) asm_conv64(as, ir); return; +#endif } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64(as, ir); +#endif return; } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64eq(as, ir); +#endif + return; +#if LJ_SOFTFP + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir-1); return; +#endif } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { @@ -1545,14 +2171,27 @@ static void asm_hiop(ASMState *as, IRIns *ir) } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; +#endif +#if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: + if (!uselo) + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; +#endif case IR_CALLN: + case IR_CALLS: case IR_CALLXS: if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ break; +#if LJ_SOFTFP + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: +#endif case IR_CNEWI: /* Nothing to do here. Handled by lo op itself. */ break; @@ -1584,46 +2223,64 @@ static void asm_stack_check(ASMState *as, BCReg topslot, Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ExitNo oldsnap = as->snapno; rset_clear(allow, pbase); +#if LJ_32 tmp = allow ? rset_pickbot(allow) : (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); +#else + tmp = allow ? rset_pickbot(allow) : RID_RET; +#endif as->snapno = exitno; asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); as->snapno = oldsnap; if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); else ra_modified(as, tmp); emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); + emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); + emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); if (pbase == RID_TMP) emit_getgl(as, RID_TMP, jit_base); emit_getgl(as, tmp, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); } /* Restore Lua stack from on-trace state. */ static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if LJ_32 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { +#if LJ_SOFTFP + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); +#endif } else { - Reg type; +#if LJ_32 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg type; lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); @@ -1633,10 +2290,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & (SNAP_CONT|SNAP_FRAME))) { if (s == 0) continue; /* Do not overwrite link to previous frame. */ type = ra_allock(as, (int32_t)(*flinks--), allow); +#if LJ_SOFTFP + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); +#endif } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); +#else + asm_tvstore64(as, RID_BASE, ofs, ref); +#endif } checkmclim(as); } @@ -1660,7 +2324,7 @@ static void asm_gc_check(ASMState *as) args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); tmp = ra_releasetmp(as, ASMREF_TMP2); emit_loadi(as, tmp, as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ @@ -1735,7 +2399,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; int32_t spadj = as->T->spadjust; MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; + *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); } @@ -1753,14 +2417,19 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { IRRef args[CCI_NARGS_MAX*2]; uint32_t i, nargs = CCI_XNARGS(ci); +#if LJ_32 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; +#else + int nslots = 0, ngpr = REGARG_NUMGPR; +#endif asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t) && +#if LJ_32 + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && nfpr > 0 && !(ci->flags & CCI_VARARG)) { nfpr--; ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; - } else if (args[i] && irt_isnum(IR(args[i])->t)) { + } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { nfpr = 0; ngpr = ngpr & ~1; if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; @@ -1768,6 +2437,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) nfpr = 0; if (ngpr > 0) ngpr--; else nslots++; } +#else + if (ngpr > 0) ngpr--; else nslots += 2; +#endif } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; diff --git a/luajit-2.1/src/lj_asm_ppc.h b/luajit-2.1/src/lj_asm_ppc.h index 7deeb66..6daa861 100644 --- a/luajit-2.1/src/lj_asm_ppc.h +++ b/luajit-2.1/src/lj_asm_ppc.h @@ -1,6 +1,6 @@ /* ** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -298,7 +298,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP); + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; if (ra_hasreg(ir->r)) @@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fb(as, PPCI_FCTIWZ, tmp, left); } @@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left = ra_alloc1(as, lref, allow); Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - const float *kbias; if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); - if (st == IRT_U32) kbias++; - emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, + emit_lsptr(as, PPCI_LFS, (fbias & 31), + &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, RID_SP, SPOFS_TMPLO); @@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, tmp); emit_fab(as, PPCI_FSUB, tmp, left, tmp); emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); } else { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); @@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); @@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); PPCIns pi = asm_fxloadins(ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768; + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tai(as, PPCI_ADDI, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; lua_assert(!irt_isi8(ir->t)); emit_tai(as, pi, dest, idx, ofs); } @@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), + (void *)&as->J->k32[LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); diff --git a/luajit-2.1/src/lj_asm_x86.h b/luajit-2.1/src/lj_asm_x86.h index 941d091..3e189b1 100644 --- a/luajit-2.1/src/lj_asm_x86.h +++ b/luajit-2.1/src/lj_asm_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Guard handling ------------------------------------------------------ */ @@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); +#if !LJ_GC64 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ *mxp++ = XI_MOVmi; *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); *mxp++ = 2*sizeof(void *); *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; +#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); @@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc) target = p; cc ^= 1; if (as->realign) { + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ emit_sjcc(as, cc, target); return; } } + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ emit_jcc(as, cc, target); } @@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); +#if LJ_GC64 + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } +#else if (ir->o != IR_KINT64) { *k = ir->i; return 1; @@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_kint64(ir)->u64; return 1; } +#endif } return 0; } @@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; +#if LJ_GC64 + int64_t ofs = dispofs(as, &uv->tv); + if (checki32(ofs) && checki32(ofs+4)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; + return; + } +#else as->mrm.ofs = ptr2addr(&uv->tv); as->mrm.base = as->mrm.idx = RID_NONE; return; +#endif } break; default: @@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) { lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); - as->mrm.ofs = field_ofs[ir->op2]; as->mrm.idx = RID_NONE; + if (ir->op1 == REF_NIL) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); + as->mrm.base = RID_NONE; +#endif + return; + } + as->mrm.ofs = field_ofs[ir->op2]; if (irref_isk(ir->op1)) { - as->mrm.ofs += IR(ir->op1)->i; + IRIns *op1 = IR(ir->op1); +#if LJ_GC64 + if (ir->op1 == REF_NIL) { + as->mrm.ofs -= GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; + return; + } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { + intptr_t ofs = dispofs(as, ir_kptr(op1)); + if (checki32(as->mrm.ofs + ofs)) { + as->mrm.ofs += (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } +#else + as->mrm.ofs += op1->i; as->mrm.base = RID_NONE; - } else { - as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); + return; +#endif } + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } /* Fuse string reference into memory operand. */ @@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); - if (irref_isk(ir->op1)) { + if (!LJ_GC64 && irref_isk(ir->op1)) { as->mrm.ofs += IR(ir->op1)->i; } else { Reg r = ra_alloc1(as, ir->op1, allow); @@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { +#if LJ_GC64 + intptr_t ofs = dispofs(as, ir_kptr(ir)); + if (checki32(ofs)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } if (0) { +#else as->mrm.ofs = ir->i; as->mrm.base = RID_NONE; } else if (ir->o == IR_STRREF) { asm_fusestrref(as, ir, allow); +#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -301,7 +363,45 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) } } -/* Fuse load into memory operand. */ +/* Fuse load of 64 bit IR constant into memory operand. */ +static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + if (!LJ_GC64 || checki32((intptr_t)k)) { + as->mrm.ofs = ptr2addr(k); + as->mrm.base = RID_NONE; +#if LJ_GC64 + } else if (checki32(dispofs(as, k))) { + as->mrm.ofs = (int32_t)dispofs(as, k); + as->mrm.base = RID_DISPATCH; + } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && + checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { + as->mrm.ofs = (int32_t)mcpofs(as, k); + as->mrm.base = RID_RIP; + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else { + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t*)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +#endif + } + as->mrm.idx = RID_NONE; + return RID_MRM; +} + +/* Fuse load into memory operand. +** +** Important caveat: this may emit RIP-relative loads! So don't place any +** code emitters between this function and the use of its result. +** The only permitted exception is asm_guardcc(). +*/ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); @@ -320,26 +420,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; lua_assert(allow != RSET_EMPTY); - if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_knum(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; - } - } else if (ir->o == IR_KINT64) { + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); + } else if (ref == REF_BASE || ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; lua_assert(allow != RSET_EMPTY); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_kint64(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; + if (ref == REF_BASE) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); + as->mrm.base = RID_NONE; +#endif + as->mrm.idx = RID_NONE; + return RID_MRM; + } else { + return asm_fuseloadk64(as, ir); + } } } else if (mayfuse(as, ref)) { RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0)) { + noconflict(as, ref, IR_RETF, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -351,7 +460,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -364,12 +474,16 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD) { + } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } } - if (!(as->freeset & allow) && !irref_isk(ref) && + if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) { + asm_fusefref(as, ir, RSET_EMPTY); + return RID_MRM; + } + if (!(as->freeset & allow) && !emit_canremat(ref) && (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) goto fusespill; return ra_allocref(as, ref, allow); @@ -485,8 +599,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { #if LJ_64 - if (ir->o == IR_KINT64) - emit_loadu64(as, r, ir_kint64(ir)->u64); + if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) + emit_loadu64(as, r, ir_k64(ir)->u64); else #endif emit_loadi(as, r, ir->i); @@ -531,7 +645,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = (LJ_32 && (ir+1)->o == IR_HIOP); + int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; if (ra_hasreg(ir->r)) @@ -642,6 +756,9 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); +#if LJ_FR2 + Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); +#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -650,7 +767,12 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); +#if LJ_FR2 + emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); + emit_loadu64(as, rpc, u64ptr(pc)); +#else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); +#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -674,8 +796,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) Reg tmp = ra_noreg(IR(ir->op1)->r) ? ra_alloc1(as, ir->op1, RSET_FPR) : ra_scratch(as, RSET_FPR); - Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); + Reg right; emit_rr(as, XO_MOVDto, tmp, dest); + right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); emit_mrm(as, XO_ADDSD, tmp, right); ra_left(as, tmp, ir->op1); } @@ -696,13 +819,13 @@ static void asm_conv(ASMState *as, IRIns *ir) if (left == dest) return; /* Avoid the XO_XORPS. */ } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ - cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_TOBIT]; Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); if (irt_isfloat(ir->t)) emit_rr(as, XO_CVTSD2SS, dest, dest); emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ - emit_loadn(as, bias, k); + emit_rma(as, XO_MOVSD, bias, k); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ @@ -711,7 +834,7 @@ static void asm_conv(ASMState *as, IRIns *ir) asm_fuseloadm(as, lref, RSET_GPR, st64); if (LJ_64 && st == IRT_U64) { MCLabel l_end = emit_label(as); - const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_2P64]; emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ @@ -738,23 +861,20 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); else - emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); } else { - Reg left = asm_fuseload(as, lref, RSET_FPR); if (LJ_64 && irt_isu32(ir->t)) emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ emit_mrm(as, op, dest|((LJ_64 && (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), - left); + asm_fuseload(as, lref, RSET_FPR)); } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ @@ -828,8 +948,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(43f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { @@ -869,8 +988,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); emit_sjcc(as, CC_NS, l_pop); emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ } @@ -934,6 +1052,25 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_GC64 + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_movmroi(as, dest, 4, k.u32.hi); + emit_movmroi(as, dest, 0, k.u32.lo); + } else { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + if (irt_is64(ir->t)) { + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); + } else { + /* Currently, no caller passes integers that might end up here. */ + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); + } + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } +#else if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); emit_movtomro(as, REX_64IR(ir, src), dest, 0); @@ -942,6 +1079,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) } if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, dest, 4, irt_toitype(ir->t)); +#endif emit_loada(as, dest, &J2G(as->J)->tmptv); } } @@ -951,9 +1089,9 @@ static void asm_aref(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); asm_fusearef(as, ir, RSET_GPR); if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); else if (as->mrm.base != dest) - emit_rr(as, XO_MOV, dest, as->mrm.base); + emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -980,7 +1118,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (!isk) { rset_clear(allow, tab); key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - if (!irt_isstr(kt)) + if (LJ_GC64 || !irt_isstr(kt)) tmp = ra_scratch(as, rset_exclude(allow, key)); } @@ -993,8 +1131,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) /* Follow hash chain until the end. */ l_loop = emit_sjcc_label(as, CC_NZ); - emit_rr(as, XO_TEST, dest, dest); - emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); + emit_rr(as, XO_TEST, dest|REX_GC64, dest); + emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ @@ -1015,7 +1153,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 emit_u32(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else @@ -1023,10 +1161,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif } -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(kt)) { emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); #endif +#if LJ_GC64 + } else if (irt_isaddr(kt)) { + if (isk) { + TValue k; + k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), + k.u32.lo); + emit_sjcc(as, CC_NE, l_next); + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), + k.u32.hi); + } else { + emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); + } + } else { + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); +#else } else { if (!irt_ispri(kt)) { lua_assert(irt_isaddr(kt)); @@ -1040,16 +1196,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) lua_assert(!irt_isnil(kt)); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); +#endif } emit_sfixup(as, l_loop); checkmclim(as); +#if LJ_GC64 + if (!isk && irt_isaddr(kt)) { + emit_rr(as, XO_OR, tmp|REX_64, key); + emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); + } +#endif /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { - emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { - emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); if ((as->flags & JIT_F_PREFER_IMUL)) { emit_i8(as, sizeof(Node)); emit_rr(as, XO_IMULi8, dest, dest); @@ -1084,7 +1247,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif } else { emit_rr(as, XO_MOV, tmp, key); +#if LJ_GC64 + checkmclim(as); + emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 32); + emit_mrm(as, XV_RORX|VEX_64, dest, key); + } else { + emit_shifti(as, XOg_SHR|REX_64, dest, 32); + emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); + } +#else emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); +#endif } } } @@ -1104,11 +1279,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { if (ofs != 0) { if (dest == node && !(as->flags & JIT_F_LEA_AGU)) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else - emit_rmro(as, XO_LEA, dest, node, ofs); + emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); } else if (dest != node) { - emit_rr(as, XO_MOV, dest, node); + emit_rr(as, XO_MOV, dest|REX_GC64, node); } } asm_guardcc(as, CC_NE); @@ -1120,13 +1295,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : +#if LJ_GC64 + ((uint64_t)irt_toitype(irkey->t) << 47) | + (uint64_t)ir_kgc(irkey)); +#else ((uint64_t)irt_toitype(irkey->t) << 32) | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); +#endif } else { lua_assert(!irt_isnil(irkey->t)); +#if LJ_GC64 + emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); +#else emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); +#endif } #else l_exit = emit_label(as); @@ -1157,25 +1343,25 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { - /* NYI: Check that UREFO is still open and not aliasing a slot. */ Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_rma(as, XO_MOV, dest, v); + emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); + emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); asm_guardcc(as, CC_NE); emit_i8(as, 1); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); } else { - emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); + emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); } - emit_rmro(as, XO_MOV, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_rmro(as, XO_MOV, uv|REX_GC64, func, + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -1193,9 +1379,9 @@ static void asm_strref(ASMState *as, IRIns *ir) if (as->mrm.base == RID_NONE) emit_loadi(as, dest, as->mrm.ofs); else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); else - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); } /* -- Loads and stores ---------------------------------------------------- */ @@ -1264,7 +1450,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 +#if LJ_64 && !LJ_GC64 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ #endif default: @@ -1296,7 +1482,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) #define asm_fstore(as, ir) asm_fxstore(as, ir) #define asm_xstore(as, ir) asm_fxstore(as, ir) -#if LJ_64 +#if LJ_64 && !LJ_GC64 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) { if (ra_used(ir) || typecheck) { @@ -1318,9 +1504,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) static void asm_ahuvload(ASMState *as, IRIns *ir) { +#if LJ_GC64 + Reg tmp = RID_NONE; +#endif lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 +#if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { @@ -1334,20 +1523,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(ir->t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); + } else { + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; + } else +#endif emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { - asm_fuseahuref(as, ir->op1, RSET_GPR); + RegSet gpr = RSET_GPR; +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + tmp = ra_scratch(as, RSET_GPR); + gpr = rset_exclude(gpr, tmp); + } +#endif + asm_fuseahuref(as, ir->op1, gpr); } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif + emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#if LJ_GC64 + } else if (irt_isaddr(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, irt_toitype(ir->t)); + emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); + } else if (irt_isnil(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, -1); + emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); + } else { + emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#else } else { emit_i8(as, irt_toitype(ir->t)); emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); +#endif } } @@ -1359,12 +1592,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(ir->t)) { Reg src = ra_alloc1(as, ir->op2, RSET_GPR); asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); #endif +#if LJ_GC64 + } else if (irref_isk(ir->op2)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); + asm_fuseahuref(as, ir->op1, RSET_GPR); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); + } else { + emit_u32(as, k.u32.lo); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + as->mrm.ofs += 4; + emit_u32(as, k.u32.hi); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } +#endif } else { IRIns *irr = IR(ir->op2); RegSet allow = RSET_GPR; @@ -1375,6 +1624,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { +#if LJ_GC64 + if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + as->mrm.ofs += 4; + emit_u32(as, irt_toitype(ir->t) << 15); + emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); + as->mrm.ofs -= 4; + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); + return; + } +#endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); @@ -1382,14 +1642,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; +#if LJ_GC64 + lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); + emit_i32(as, LJ_TNUMX << 15); +#else emit_i32(as, (int32_t)irt_toitype(ir->t)); +#endif emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ @@ -1402,7 +1668,7 @@ static void asm_sload(ASMState *as, IRIns *ir) base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(t)) { Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); if (ra_hasreg(dest)) { @@ -1420,6 +1686,36 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { +#if LJ_GC64 + if (irt_isaddr(t)) { + /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: + ** + ** mov r64, [addr] rorx r64, [addr], 47 + ** ror r64, 47 + ** cmp r16, itype cmp r16, itype + ** jne ->exit jne ->exit + ** shr r64, 16 shr r64, 16 + */ + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + } + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + else + emit_shifti(as, XOg_SHL|REX_64, dest, 17); + emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); + } + return; + } else +#endif emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { @@ -1432,11 +1728,42 @@ static void asm_sload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(t) >= IRT_NUM) { lua_assert(irt_isinteger(t) || irt_isnum(t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif + emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); +#if LJ_GC64 + } else if (irt_isnil(t)) { + /* LJ_GC64 type check for nil: + ** + ** cmp qword [addr], -1 + ** jne ->exit + */ + emit_i8(as, -1); + emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); + } else if (irt_ispri(t)) { + emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); } else { + /* LJ_GC64 type check only: + ** + ** mov r64, [addr] + ** sar r64, 47 + ** cmp r32, itype + ** jne ->exit + */ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); +#else + } else { emit_i8(as, irt_toitype(t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); +#endif } } } @@ -1464,8 +1791,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); - uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : - (uint64_t)(uint32_t)irk->i; + uint64_t k = (irk->o == IR_KINT64 || + (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ? + ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; if (sz == 4 || checki32((int64_t)k)) { emit_i32(as, (int32_t)k); emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); @@ -1530,7 +1858,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); MCLabel l_end = emit_label(as); - emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); + emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, tmp, gc.grayagain); emit_i8(as, ~LJ_GC_BLACK); @@ -1956,7 +2284,7 @@ static void asm_bswap(ASMState *as, IRIns *ir) #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) -static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) +static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) { IRRef rref = ir->op2; IRIns *irr = IR(rref); @@ -1965,11 +2293,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) int shift; dest = ra_dest(as, ir, RSET_GPR); shift = irr->i & (irt_is64(ir->t) ? 63 : 31); + if (!xv && shift && (as->flags & JIT_F_BMI2)) { + Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t)); + if (left != dest) { /* BMI2 rotate right by constant. */ + emit_i8(as, xs == XOg_ROL ? -shift : shift); + emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left); + return; + } + } switch (shift) { case 0: break; case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; } + } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */ + Reg left, right; + dest = ra_dest(as, ir, RSET_GPR); + right = ra_alloc1(as, rref, RSET_GPR); + left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right), + irt_is64(ir->t)); + emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left); + return; } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ Reg right; dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); @@ -1995,11 +2339,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) */ } -#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR) -#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL) -#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR) +#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX) +#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX) +#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX) +#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0) +#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0) /* -- Comparisons --------------------------------------------------------- */ @@ -2050,7 +2394,6 @@ static void asm_comp(ASMState *as, IRIns *ir) cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ } left = ra_alloc1(as, lref, RSET_FPR); - right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); l_around = emit_label(as); asm_guardcc(as, cc >> 4); if (cc & VCC_P) { /* Extra CC_P branch required? */ @@ -2067,6 +2410,7 @@ static void asm_comp(ASMState *as, IRIns *ir) emit_jcc(as, CC_P, as->mcp); } } + right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); emit_mrm(as, XO_UCOMISD, left, right); } else { IRRef lref = ir->op1, rref = ir->op2; @@ -2343,13 +2687,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); else ra_modified(as, r); - emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); + emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); if (ra_hasreg(pbase) && pbase != r) - emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); + emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else +#if LJ_GC64 + emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, + (int32_t)dispofs(as, &J2G(as->J)->jit_base)); +#else emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); - emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); +#endif + emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); @@ -2359,13 +2708,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -2378,16 +2729,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) (LJ_DUALNUM && irt_isinteger(ir->t))); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); +#if LJ_GC64 + if (irt_is64(ir->t)) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); + } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); + } else { + emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); + } +#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); +#if LJ_GC64 + } else { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); + } else { + emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); + emit_movmroi(as, RID_BASE, ofs, k.u32.lo); + } +#else } else if (!irt_ispri(ir->t)) { emit_movmroi(as, RID_BASE, ofs, ir->i); +#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { +#if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); +#endif +#if !LJ_GC64 } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); +#endif } } checkmclim(as); @@ -2413,11 +2792,15 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); +#if LJ_GC64 + emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); +#else emit_loada(as, tmp, J2G(as->J)); +#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); - emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); + emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); emit_getgl(as, tmp, gc.total); as->gcsteps = 0; checkmclim(as); @@ -2482,7 +2865,7 @@ static void asm_head_root_base(ASMState *as) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (r != RID_BASE) - emit_rr(as, XO_MOV, r, RID_BASE); + emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); } } @@ -2498,8 +2881,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (irp->r == r) { rset_clear(allow, r); /* Mark same BASE register as coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ rset_clear(allow, irp->r); - emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ + emit_rr(as, XO_MOV, r|REX_GC64, irp->r); } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } @@ -2600,10 +2984,111 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) static void asm_setup_target(ASMState *as) { asm_exitstub_setup(as, as->T->nsnap); + as->mrm.base = 0; } /* -- Trace patching ------------------------------------------------------ */ +static const uint8_t map_op1[256] = { +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +#if LJ_64 +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, +#else +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, +#if LJ_64 +0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#else +0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, +0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, +0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 +}; + +static const uint8_t map_op2[256] = { +0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 +}; + +static uint32_t asm_x86_inslen(const uint8_t* p) +{ + uint32_t result = 0; + uint32_t prefixes = 0; + uint32_t x = map_op1[*p]; + for (;;) { + switch (x >> 4) { + case 0: return result + x + (prefixes & 4); + case 1: prefixes |= x; x = map_op1[*++p]; result++; break; + case 2: x = map_op2[*++p]; break; + case 3: p++; goto mrm; + case 4: result -= (prefixes & 2); /* fallthrough */ + case 5: return result + (x & 15); + case 6: /* Group 3. */ + if (p[1] & 0x38) x = 2; + else if ((prefixes & 2) && (x == 0x66)) x = 4; + goto mrm; + case 7: /* VEX c4/c5. */ + if (LJ_32 && p[1] < 0xc0) { + x = 2; + goto mrm; + } + if (x == 0x70) { + x = *++p & 0x1f; + result++; + if (x >= 2) { + p += 2; + result += 2; + goto mrm; + } + } + p++; + result++; + x = map_op2[*++p]; + break; + case 8: result -= (prefixes & 2); /* fallthrough */ + case 9: mrm: /* ModR/M and possibly SIB. */ + result += (x & 15); + x = *++p; + switch (x >> 6) { + case 0: if ((x & 7) == 5) return result + 4; break; + case 1: result++; break; + case 2: result += 4; break; + case 3: return result; + } + if ((x & 7) == 4) { + result++; + if (x < 0x40 && (p[1] & 7) == 5) result += 4; + } + return result; + } + } +} + /* Patch exit jumps of existing machine code to a new target. */ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) { @@ -2612,22 +3097,23 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; - uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); +#if LJ_GC64 + uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); +#else + uint32_t statei = u32ptr(&J2G(J)->vmstate); +#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ - for (; p < pe; p++) - if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { - p += LJ_64 ? 11 : 10; + for (; p < pe; p += asm_x86_inslen(p)) { + intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; - } + } lua_assert(p < pe); - for (; p < pe; p++) { - if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { + for (; p < pe; p += asm_x86_inslen(p)) + if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) *(int32_t *)(p+2) = jmprel(p+6, target); - p += 5; - } - } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); lj_mcode_patch(J, mcarea, 1); } diff --git a/luajit-2.1/src/lj_bc.c b/luajit-2.1/src/lj_bc.c index a8f444c..a597692 100644 --- a/luajit-2.1/src/lj_bc.c +++ b/luajit-2.1/src/lj_bc.c @@ -1,6 +1,6 @@ /* ** Bytecode instruction modes. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bc_c diff --git a/luajit-2.1/src/lj_bc.h b/luajit-2.1/src/lj_bc.h index 64c1bcd..69a45f2 100644 --- a/luajit-2.1/src/lj_bc.h +++ b/luajit-2.1/src/lj_bc.h @@ -1,6 +1,6 @@ /* ** Bytecode instruction format. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BC_H diff --git a/luajit-2.1/src/lj_bcdump.h b/luajit-2.1/src/lj_bcdump.h index c389831..fdfc6ec 100644 --- a/luajit-2.1/src/lj_bcdump.h +++ b/luajit-2.1/src/lj_bcdump.h @@ -1,6 +1,6 @@ /* ** Bytecode dump definitions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BCDUMP_H diff --git a/luajit-2.1/src/lj_bcread.c b/luajit-2.1/src/lj_bcread.c index 5e50217..48c5e7c 100644 --- a/luajit-2.1/src/lj_bcread.c +++ b/luajit-2.1/src/lj_bcread.c @@ -1,6 +1,6 @@ /* ** Bytecode reader. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcread_c diff --git a/luajit-2.1/src/lj_bcwrite.c b/luajit-2.1/src/lj_bcwrite.c index b2c0973..5e05cae 100644 --- a/luajit-2.1/src/lj_bcwrite.c +++ b/luajit-2.1/src/lj_bcwrite.c @@ -1,6 +1,6 @@ /* ** Bytecode writer. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcwrite_c diff --git a/luajit-2.1/src/lj_buf.c b/luajit-2.1/src/lj_buf.c index 023bb9a..0dfe7f9 100644 --- a/luajit-2.1/src/lj_buf.c +++ b/luajit-2.1/src/lj_buf.c @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_buf_c @@ -77,7 +77,6 @@ SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) return sb; } -#if LJ_HASJIT SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) { char *p = lj_buf_more(sb, 1); @@ -85,7 +84,6 @@ SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) setsbufP(sb, p); return sb; } -#endif SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) { @@ -186,7 +184,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) } else if (tvisint(o)) { p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); } else if (tvisnum(o)) { - p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o); + p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); } else { goto badtype; } diff --git a/luajit-2.1/src/lj_buf.h b/luajit-2.1/src/lj_buf.h index 1cf1780..a405169 100644 --- a/luajit-2.1/src/lj_buf.h +++ b/luajit-2.1/src/lj_buf.h @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BUF_H @@ -67,9 +67,7 @@ static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) /* Low-level buffer put operations */ LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); -#if LJ_HASJIT LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); -#endif LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) diff --git a/luajit-2.1/src/lj_carith.c b/luajit-2.1/src/lj_carith.c index 9032ea3..218abd2 100644 --- a/luajit-2.1/src/lj_carith.c +++ b/luajit-2.1/src/lj_carith.c @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/luajit-2.1/src/lj_carith.h b/luajit-2.1/src/lj_carith.h index da8320f..67d976b 100644 --- a/luajit-2.1/src/lj_carith.h +++ b/luajit-2.1/src/lj_carith.h @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CARITH_H diff --git a/luajit-2.1/src/lj_ccall.c b/luajit-2.1/src/lj_ccall.c index 5ab5b60..5c252e5 100644 --- a/luajit-2.1/src/lj_ccall.c +++ b/luajit-2.1/src/lj_ccall.c @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -301,7 +301,7 @@ unsigned int cl = ccall_classify_struct(cts, ctr); \ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ CTSize i = (cl >> 8) - 1; \ - do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ + do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ } else { \ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ memcpy(dp, sp, ctr->size); \ @@ -331,7 +331,7 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in separate (!) FPRs or on stack. */ \ - isfp = ctr->size == 2*sizeof(float) ? 2 : 1; + isfp = sz == 2*sizeof(float) ? 2 : 1; #define CCALL_HANDLE_REGARG \ if (LJ_TARGET_IOS && isva) { \ @@ -359,6 +359,13 @@ } \ } +#if LJ_BE +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; +#endif + + #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ @@ -407,8 +414,8 @@ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ -#elif LJ_TARGET_MIPS -/* -- MIPS calling conventions -------------------------------------------- */ +#elif LJ_TARGET_MIPS32 +/* -- MIPS o32 calling conventions ---------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ @@ -418,6 +425,18 @@ /* Complex values are returned in 1 or 2 FPRs. */ \ cc->retref = 0; +#if LJ_ABI_SOFTFP +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + } else { /* Copy complex double from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + ((intptr_t *)dp)[2] = cc->gpr[2]; \ + ((intptr_t *)dp)[3] = cc->gpr[3]; \ + } +#else #define CCALL_HANDLE_COMPLEXRET2 \ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ((float *)dp)[0] = cc->fpr[0].f; \ @@ -426,6 +445,7 @@ ((double *)dp)[0] = cc->fpr[0].d; \ ((double *)dp)[1] = cc->fpr[1].d; \ } +#endif #define CCALL_HANDLE_STRUCTARG \ /* Pass all structs by value in registers and/or on the stack. */ @@ -433,6 +453,22 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ +#define CCALL_HANDLE_GPR \ + if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } + +#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ #define CCALL_HANDLE_REGARG \ if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ /* Try to pass argument in FPRs. */ \ @@ -441,24 +477,90 @@ goto done; \ } else { /* Try to pass argument in GPRs. */ \ nfpr = CCALL_NARG_FPR; \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } \ + CCALL_HANDLE_GPR \ } +#else /* MIPS32 soft-float */ +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR +#endif +#if !LJ_ABI_SOFTFP +/* On MIPS64 soft-float, position of float return values is endian-dependant. */ #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ sp = (uint8_t *)&cc->fpr[0].f; +#endif + +#elif LJ_TARGET_MIPS64 +/* -- MIPS n64 calling conventions ---------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#if LJ_ABI_SOFTFP /* MIPS64 soft-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + } else { /* Copy complex double from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + +/* Position of soft-float 'float' return value depends on endianess. */ +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); + +#else /* MIPS64 hard-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + if (ngpr < maxgpr) \ + sz *= 2; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#endif + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass all structs by value in registers and/or on the stack. */ + +#define CCALL_HANDLE_REGARG \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } #else #error "Missing calling convention definitions for this architecture" @@ -699,6 +801,78 @@ noth: /* Not a homogeneous float/double aggregate. */ #endif +/* -- MIPS64 ABI struct classification ---------------------------- */ + +#if LJ_TARGET_MIPS64 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +/* Classify FP fields (max. 2) and their types. */ +static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) +{ + int n = 0, ft = 0; + if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) + goto noth; + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (n == 2) { + goto noth; + } else if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; + n++; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + goto noth; + } + } + if (n <= 2) + return ft; +noth: /* Not a homogeneous float/double aggregate. */ + return 0; /* Struct is in GPRs. */ +} + +void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) +{ + if (LJ_ABI_SOFTFP ? ft : + ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { + int i, ofs = 0; + for (i = 0; ft != 0; i++, ft >>= 2) { + if ((ft & 3) == FTYPE_FLOAT) { +#if LJ_ABI_SOFTFP + /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ + memcpy((uint8_t *)dp + ofs, + (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); +#else + *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; +#endif + ofs += 4; + } else { + ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ +#if LJ_ABI_SOFTFP + *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; +#else + *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; +#endif + ofs += 8; + } + } + } else { +#if !LJ_ABI_SOFTFP + if (ft) sp = (uint8_t *)&cc->fpr[0]; +#endif + memcpy(dp, sp, ctr->size); + } +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -866,6 +1040,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_ARM64 && LJ_BE + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) +#if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +#endif + ) && d->size <= 4) { + *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) @@ -881,7 +1068,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } -#elif LJ_TARGET_ARM64 +#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { /* Split float HFA or complex float into separate registers. */ CTSize i = (sz >> 2) - 1; @@ -928,7 +1115,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_COMPLEXRET2 return 1; /* One GC step. */ } - if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) + if (LJ_BE && ctr->size < CTSIZE_PTR && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) sp += (CTSIZE_PTR - ctr->size); #if CCALL_NUM_FPR if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) diff --git a/luajit-2.1/src/lj_ccall.h b/luajit-2.1/src/lj_ccall.h index 91983fe..59f6648 100644 --- a/luajit-2.1/src/lj_ccall.h +++ b/luajit-2.1/src/lj_ccall.h @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALL_H @@ -79,8 +79,8 @@ typedef union FPRArg { typedef intptr_t GPRArg; typedef union FPRArg { double d; - float f; - uint32_t u32; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; } FPRArg; #elif LJ_TARGET_PPC @@ -95,12 +95,12 @@ typedef union FPRArg { typedef intptr_t GPRArg; typedef double FPRArg; -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 #define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR 2 -#define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR 2 +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) #define CCALL_SPS_EXTRA 7 #define CCALL_SPS_FREE 1 @@ -110,6 +110,22 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_MIPS64 + +/* FP args are positional and overlay the GPR array. */ +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 0 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/luajit-2.1/src/lj_ccallback.c b/luajit-2.1/src/lj_ccallback.c index 065c329..846827b 100644 --- a/luajit-2.1/src/lj_ccallback.c +++ b/luajit-2.1/src/lj_ccallback.c @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -63,9 +63,13 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 24 -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 -#define CALLBACK_MCODE_HEAD 24 +#define CALLBACK_MCODE_HEAD 20 + +#elif LJ_TARGET_MIPS64 + +#define CALLBACK_MCODE_HEAD 52 #else @@ -169,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page) uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; - *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); - *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); - *p++ = A64I_BR | A64F_N(RID_X11); - *p++ = A64I_NOP; + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); + *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_NOP); ((void **)p)[0] = target; ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); - *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); + *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); + *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; } lua_assert(p - page <= CALLBACK_MCODE_SIZE); @@ -206,14 +210,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; MSize slot; - *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); +#if LJ_TARGET_MIPS32 + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); +#else + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); +#endif + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); *p++ = MIPSI_JR | MIPSF_S(RID_R3); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); p++; @@ -425,8 +442,17 @@ void lj_ccallback_mcode_free(CTState *cts) if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 +#define CALLBACK_HANDLE_GPR \ + if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } + +#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ #define CALLBACK_HANDLE_REGARG \ if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ @@ -434,13 +460,36 @@ void lj_ccallback_mcode_free(CTState *cts) goto done; \ } else { /* Try to pass argument in GPRs. */ \ nfpr = CCALL_NARG_FPR; \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ + CALLBACK_HANDLE_GPR \ + } +#else /* MIPS32 soft-float */ +#define CALLBACK_HANDLE_REGARG \ + CALLBACK_HANDLE_GPR \ + UNUSED(isfp); +#endif + +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + +#elif LJ_TARGET_MIPS64 + +#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } +#else /* MIPS64 soft-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + UNUSED(isfp); \ + sp = (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ } +#endif #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ @@ -533,7 +582,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) nsp += n; done: - if (LJ_BE && cta->size < CTSIZE_PTR) + if (LJ_BE && cta->size < CTSIZE_PTR +#if LJ_TARGET_MIPS64 + && !(isfp && nsp) +#endif + ) sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); } @@ -571,6 +624,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) if (ctype_isfp(ctr->info)) dp = (uint8_t *)&cts->cb.fpr[0]; #endif +#if LJ_TARGET_ARM64 && LJ_BE + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) + dp = (uint8_t *)&cts->cb.fpr[0].f[1]; +#endif lj_cconv_ct_tv(cts, ctr, dp, o, 0); #ifdef CALLBACK_HANDLE_RET CALLBACK_HANDLE_RET @@ -584,6 +641,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; +#endif #if LJ_TARGET_X86 if (ctype_isfp(ctr->info)) cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; diff --git a/luajit-2.1/src/lj_ccallback.h b/luajit-2.1/src/lj_ccallback.h index 83dbe04..a8cdad3 100644 --- a/luajit-2.1/src/lj_ccallback.h +++ b/luajit-2.1/src/lj_ccallback.h @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALLBACK_H diff --git a/luajit-2.1/src/lj_cconv.c b/luajit-2.1/src/lj_cconv.c index 8a27076..ab398ad 100644 --- a/luajit-2.1/src/lj_cconv.c +++ b/luajit-2.1/src/lj_cconv.c @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" diff --git a/luajit-2.1/src/lj_cconv.h b/luajit-2.1/src/lj_cconv.h index 2bd50ff..0a0b66c 100644 --- a/luajit-2.1/src/lj_cconv.h +++ b/luajit-2.1/src/lj_cconv.h @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCONV_H diff --git a/luajit-2.1/src/lj_cdata.c b/luajit-2.1/src/lj_cdata.c index 5cd2c11..68e16d7 100644 --- a/luajit-2.1/src/lj_cdata.c +++ b/luajit-2.1/src/lj_cdata.c @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -49,6 +49,15 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) return cd; } +/* Allocate arbitrary C data object. */ +GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) +{ + if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) + return lj_cdata_new(cts, id, sz); + else + return lj_cdata_newv(cts->L, id, sz, ctype_align(info)); +} + /* Free a C data object. */ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) { @@ -84,11 +93,13 @@ void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) setcdataV(L, &tmp, cd); lj_gc_anybarriert(L, t); tv = lj_tab_set(L, t, &tmp); - setgcV(L, tv, obj, it); - if (!tvisnil(tv)) - cd->marked |= LJ_GC_CDATA_FIN; - else + if (it == LJ_TNIL) { + setnilV(tv); cd->marked &= ~LJ_GC_CDATA_FIN; + } else { + setgcV(L, tv, obj, it); + cd->marked |= LJ_GC_CDATA_FIN; + } } } diff --git a/luajit-2.1/src/lj_cdata.h b/luajit-2.1/src/lj_cdata.h index c8975be..5bb0f5d 100644 --- a/luajit-2.1/src/lj_cdata.h +++ b/luajit-2.1/src/lj_cdata.h @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CDATA_H @@ -60,6 +60,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz) LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align); +LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, + CTInfo info); LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, diff --git a/luajit-2.1/src/lj_clib.c b/luajit-2.1/src/lj_clib.c index 1e927eb..6142659 100644 --- a/luajit-2.1/src/lj_clib.c +++ b/luajit-2.1/src/lj_clib.c @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -40,7 +40,7 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) #define clib_error(L, fmt, name) clib_error_(L) -#if defined(__CYGWIN__) +#if LJ_TARGET_CYGWIN #define CLIB_SOPREFIX "cyg" #else #define CLIB_SOPREFIX "lib" @@ -48,7 +48,7 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) #if LJ_TARGET_OSX #define CLIB_SOEXT "%s.dylib" -#elif defined(__CYGWIN__) +#elif LJ_TARGET_CYGWIN #define CLIB_SOEXT "%s.dll" #else #define CLIB_SOEXT "%s.so" @@ -57,14 +57,14 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) static const char *clib_extname(lua_State *L, const char *name) { if (!strchr(name, '/') -#ifdef __CYGWIN__ +#if LJ_TARGET_CYGWIN && !strchr(name, '\\') #endif ) { if (!strchr(name, '.')) { name = lj_strfmt_pushf(L, CLIB_SOEXT, name); L->top--; -#ifdef __CYGWIN__ +#if LJ_TARGET_CYGWIN } else { return name; #endif diff --git a/luajit-2.1/src/lj_clib.h b/luajit-2.1/src/lj_clib.h index e5dc98e..fcc9dac 100644 --- a/luajit-2.1/src/lj_clib.h +++ b/luajit-2.1/src/lj_clib.h @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CLIB_H diff --git a/luajit-2.1/src/lj_cparse.c b/luajit-2.1/src/lj_cparse.c index 5d3874a..ee22326 100644 --- a/luajit-2.1/src/lj_cparse.c +++ b/luajit-2.1/src/lj_cparse.c @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -297,13 +297,17 @@ static CPToken cp_next_(CPState *cp) else return '/'; break; case '|': - if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR; + if (cp_get(cp) != '|') return '|'; + cp_get(cp); return CTOK_OROR; case '&': - if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND; + if (cp_get(cp) != '&') return '&'; + cp_get(cp); return CTOK_ANDAND; case '=': - if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ; + if (cp_get(cp) != '=') return '='; + cp_get(cp); return CTOK_EQ; case '!': - if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE; + if (cp_get(cp) != '=') return '!'; + cp_get(cp); return CTOK_NE; case '<': if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; } else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; } @@ -313,7 +317,8 @@ static CPToken cp_next_(CPState *cp) else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; } return '>'; case '-': - if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF; + if (cp_get(cp) != '>') return '-'; + cp_get(cp); return CTOK_DEREF; case '$': return cp_param(cp); case '\0': return CTOK_EOF; @@ -1744,6 +1749,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) } } +/* Handle line number. */ +static void cp_line(CPState *cp, BCLine hashline) +{ + BCLine newline = cp->val.u32; + /* TODO: Handle file name and include it in error messages. */ + while (cp->tok != CTOK_EOF && cp->linenumber == hashline) + cp_next(cp); + cp->linenumber = newline; +} + /* Parse multiple C declarations of types or extern identifiers. */ static void cp_decl_multi(CPState *cp) { @@ -1756,12 +1771,23 @@ static void cp_decl_multi(CPState *cp) continue; } if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ - BCLine pragmaline = cp->linenumber; - if (!(cp_next(cp) == CTOK_IDENT && - cp->str->hash == H_(3f192524,f711b0ec))) /* pragma */ + BCLine hashline = cp->linenumber; + CPToken tok = cp_next(cp); + if (tok == CTOK_INTEGER) { + cp_line(cp, hashline); + continue; + } else if (tok == CTOK_IDENT && + cp->str->hash == H_(187aab88,fcb60b42)) { /* line */ + if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); + cp_line(cp, hashline); + continue; + } else if (tok == CTOK_IDENT && + cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */ + cp_pragma(cp, hashline); + continue; + } else { cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); - cp_pragma(cp, pragmaline); - continue; + } } scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); if ((cp->tok == ';' || cp->tok == CTOK_EOF) && diff --git a/luajit-2.1/src/lj_cparse.h b/luajit-2.1/src/lj_cparse.h index 441580d..bad1060 100644 --- a/luajit-2.1/src/lj_cparse.h +++ b/luajit-2.1/src/lj_cparse.h @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CPARSE_H diff --git a/luajit-2.1/src/lj_crecord.c b/luajit-2.1/src/lj_crecord.c index e200cc9..2db4c86 100644 --- a/luajit-2.1/src/lj_crecord.c +++ b/luajit-2.1/src/lj_crecord.c @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c @@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) return tr; } +/* Tailcall to function. */ +static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) +{ + TRef kfunc = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-2] = kfunc; + J->base[-1] = TREF_FRAME; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif + rd->nres = -1; /* Pending tailcall. */ +} + /* Record ctype __index/__newindex metamethods. */ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, RecordFFData *rd) @@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, if (!tv) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { /* Specialize to result of __index lookup. */ cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); @@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J) lua_State *L = J->L; TValue *base = L->base, *top = L->top; const BCIns *pc = J->pc; - TRef ftr = J->base[-1]; + TRef ftr = J->base[-1-LJ_FR2]; ptrdiff_t delta; if (!frame_islua(base-1) || J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYICALL); J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); L->top = base; L->base = base - delta; - J->base[-1] = TREF_FALSE; + J->base[-1-LJ_FR2] = TREF_FALSE; J->base -= delta; J->baseslot -= (BCReg)delta; - J->maxslot = (BCReg)delta; J->framedepth--; + J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; lj_snap_add(J); L->base = base; L->top = top; J->framedepth++; J->maxslot = 1; J->base += delta; J->baseslot += (BCReg)delta; - J->base[-1] = ftr; J->pc = pc; + J->base[-1-LJ_FR2] = ftr; J->pc = pc; } /* Record function call. */ @@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return; } } else if (mm == MM_new) { @@ -1238,7 +1249,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) { - if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { + if (sp[0] && sp[1] && ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { IRType dt; CTypeID id; TRef tr; @@ -1296,6 +1307,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) { CTState *cts = ctype_ctsG(J2G(J)); CType *ctp = s[0]; + if (!(sp[0] && sp[1])) return 0; if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) { if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) && (ctype_isptr(s[1]->info) || ctype_isrefarray(s[1]->info))) { @@ -1373,8 +1385,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, } if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return 0; } /* NYI: non-function metamethods. */ } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ diff --git a/luajit-2.1/src/lj_crecord.h b/luajit-2.1/src/lj_crecord.h index 59f342a..c165def 100644 --- a/luajit-2.1/src/lj_crecord.h +++ b/luajit-2.1/src/lj_crecord.h @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CRECORD_H diff --git a/luajit-2.1/src/lj_ctype.c b/luajit-2.1/src/lj_ctype.c index eda070c..0ea89c7 100644 --- a/luajit-2.1/src/lj_ctype.c +++ b/luajit-2.1/src/lj_ctype.c @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -14,6 +14,7 @@ #include "lj_strfmt.h" #include "lj_ctype.h" #include "lj_ccallback.h" +#include "lj_buf.h" /* -- C type definitions -------------------------------------------------- */ @@ -571,19 +572,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) /* Convert complex to string with 'i' or 'I' suffix. */ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) { - char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf; + SBuf *sb = lj_buf_tmp_(L); TValue re, im; if (size == 2*sizeof(double)) { re.n = *(double *)sp; im.n = ((double *)sp)[1]; } else { re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; } - p = lj_strfmt_wnum(p, &re); - if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+'; - p = lj_strfmt_wnum(p, &im); - *p = *(p-1) >= 'a' ? 'I' : 'i'; - p++; - return lj_str_new(L, buf, p-buf); + lj_strfmt_putfnum(sb, STRFMT_G14, re.n); + if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); + lj_strfmt_putfnum(sb, STRFMT_G14, im.n); + lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i'); + return lj_buf_str(L, sb); } /* -- C type state -------------------------------------------------------- */ diff --git a/luajit-2.1/src/lj_ctype.h b/luajit-2.1/src/lj_ctype.h index 6639547..0c220a8 100644 --- a/luajit-2.1/src/lj_ctype.h +++ b/luajit-2.1/src/lj_ctype.h @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CTYPE_H @@ -42,18 +42,18 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT); ** ---------- info ------------ ** |type flags... A cid | size | sib | next | name | ** +----------------------------+--------+-------+-------+-------+-- -** |NUM BFvcUL.. A | size | | type | | -** |STRUCT ..vcU..V A | size | field | name? | name? | -** |PTR ..vcR... A cid | size | | type | | -** |ARRAY VCvc...V A cid | size | | type | | -** |VOID ..vc.... A | size | | type | | +** |NUM BFcvUL.. A | size | | type | | +** |STRUCT ..cvU..V A | size | field | name? | name? | +** |PTR ..cvR... A cid | size | | type | | +** |ARRAY VCcv...V A cid | size | | type | | +** |VOID ..cv.... A | size | | type | | ** |ENUM A cid | size | const | name? | name? | ** |FUNC ....VS.. cc cid | nargs | field | name? | name? | ** |TYPEDEF cid | | | name | name | ** |ATTRIB attrnum cid | attr | sib? | type? | | ** |FIELD cid | offset | field | | name? | -** |BITFIELD B.vcU csz bsz pos | offset | field | | name? | -** |CONSTVAL c cid | value | const | name | name | +** |BITFIELD B.cvU csz bsz pos | offset | field | | name? | +** |CONSTVAL c cid | value | const | name | name | ** |EXTERN cid | | sib? | name | name | ** |KW tok | size | | name | name | ** +----------------------------+--------+-------+-------+-------+-- diff --git a/luajit-2.1/src/lj_debug.c b/luajit-2.1/src/lj_debug.c index 3226d03..959dc28 100644 --- a/luajit-2.1/src/lj_debug.c +++ b/luajit-2.1/src/lj_debug.c @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_debug_c diff --git a/luajit-2.1/src/lj_debug.h b/luajit-2.1/src/lj_debug.h index 11d308a..5917c00 100644 --- a/luajit-2.1/src/lj_debug.h +++ b/luajit-2.1/src/lj_debug.h @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEBUG_H diff --git a/luajit-2.1/src/lj_def.h b/luajit-2.1/src/lj_def.h index c8fe4aa..2d8fff6 100644 --- a/luajit-2.1/src/lj_def.h +++ b/luajit-2.1/src/lj_def.h @@ -1,6 +1,6 @@ /* ** LuaJIT common internal definitions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEF_H @@ -95,6 +95,9 @@ typedef unsigned int uintptr_t; #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) +#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) +#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) +#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) @@ -103,14 +106,8 @@ typedef unsigned int uintptr_t; #define checki32(x) ((x) == (int32_t)(x)) #define checku32(x) ((x) == (uint32_t)(x)) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) -#define checkptr47(x) (((uint64_t)(x) >> 47) == 0) -#if LJ_GC64 -#define checkptrGC(x) (checkptr47((x))) -#elif LJ_64 -#define checkptrGC(x) (checkptr32((x))) -#else -#define checkptrGC(x) 1 -#endif +#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) diff --git a/luajit-2.1/src/lj_dispatch.c b/luajit-2.1/src/lj_dispatch.c index 1a07371..5d6795f 100644 --- a/luajit-2.1/src/lj_dispatch.c +++ b/luajit-2.1/src/lj_dispatch.c @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_dispatch_c @@ -75,7 +75,7 @@ void lj_dispatch_init(GG_State *GG) for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); #if LJ_TARGET_MIPS - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); #endif } @@ -267,7 +267,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) case LUAJIT_MODE_FUNC: case LUAJIT_MODE_ALLFUNC: case LUAJIT_MODE_ALLSUBFUNC: { - cTValue *tv = idx == 0 ? frame_prev(L->base-1) : + cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 : idx > 0 ? L->base + (idx-1) : L->top + idx; GCproto *pt; if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) diff --git a/luajit-2.1/src/lj_dispatch.h b/luajit-2.1/src/lj_dispatch.h index 1e247e3..5bda51a 100644 --- a/luajit-2.1/src/lj_dispatch.h +++ b/luajit-2.1/src/lj_dispatch.h @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DISPATCH_H @@ -14,6 +14,22 @@ #if LJ_TARGET_MIPS /* Need our own global offset table for the dreaded MIPS calling conventions. */ + +#ifndef _LJ_VM_H +LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); +#endif + +#if LJ_SOFTFP +#ifndef _LJ_IRCALL_H +extern double __adddf3(double a, double b); +extern double __subdf3(double a, double b); +extern double __muldf3(double a, double b); +extern double __divdf3(double a, double b); +#endif +#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) +#else +#define SFGOTDEF(_) +#endif #if LJ_HASJIT #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) #else @@ -28,18 +44,19 @@ #define GOTDEF(_) \ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ - _(pow) _(fmod) _(ldexp) \ + _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ _(lj_dispatch_profile) _(lj_err_throw) \ _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ - _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \ + _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ - _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_) + _(lj_buf_putstr_upper) _(lj_buf_tostr) \ + JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) enum { #define GOTENUM(name) LJ_GOT_##name, @@ -90,6 +107,7 @@ typedef struct GG_State { #define J2G(J) (&J2GG(J)->g) #define G2J(gl) (&G2GG(gl)->J) #define L2J(L) (&L2GG(L)->J) +#define GG_G2J (GG_OFS(J) - GG_OFS(g)) #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) diff --git a/luajit-2.1/src/lj_emit_arm.h b/luajit-2.1/src/lj_emit_arm.h index 45ce519..dee8bdc 100644 --- a/luajit-2.1/src/lj_emit_arm.h +++ b/luajit-2.1/src/lj_emit_arm.h @@ -1,6 +1,6 @@ /* ** ARM instruction emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Constant encoding --------------------------------------------------- */ @@ -207,7 +207,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) @@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) #if !LJ_SOFTFP /* Load a number constant into an FPR. */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { + cTValue *tv = ir_knum(ir); int32_t i; if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { uint32_t hi = tv->u32.hi; @@ -273,7 +274,7 @@ static void emit_call(ASMState *as, void *target) ptrdiff_t delta = ((char *)target - (char *)p) - 8; if ((((delta>>2) + 0x00800000) >> 24) == 0) { if ((delta & 1)) - *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 27); + *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); else *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); } else { /* Target out of range: need indirect call. But don't use R0-R3. */ diff --git a/luajit-2.1/src/lj_emit_arm64.h b/luajit-2.1/src/lj_emit_arm64.h new file mode 100644 index 0000000..6da4c7d --- /dev/null +++ b/luajit-2.1/src/lj_emit_arm64.h @@ -0,0 +1,419 @@ +/* +** ARM64 instruction emitter. +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Constant encoding --------------------------------------------------- */ + +static uint64_t get_k64val(IRIns *ir) +{ + if (ir->o == IR_KINT64) { + return ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (uint64_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (uint64_t)ir_kptr(ir); + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + return ir->i; /* Sign-extended. */ + } +} + +/* Encode constant in K12 format for data processing instructions. */ +static uint32_t emit_isk12(int64_t n) +{ + uint64_t k = (n < 0) ? -n : n; + uint32_t m = (n < 0) ? 0x40000000 : 0; + if (k < 0x1000) { + return A64I_K12|m|A64F_U12(k); + } else if ((k & 0xfff000) == k) { + return A64I_K12|m|0x400000|A64F_U12(k>>12); + } + return 0; +} + +#define emit_clz64(n) __builtin_clzll(n) +#define emit_ctz64(n) __builtin_ctzll(n) + +/* Encode constant in K13 format for logical data processing instructions. */ +static uint32_t emit_isk13(uint64_t n, int is64) +{ + int inv = 0, w = 128, lz, tz; + if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ + if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ + do { /* Find the repeat width. */ + if (is64 && (uint32_t)(n^(n>>32))) break; + n = (uint32_t)n; + if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ + w = 32; if ((n^(n>>16)) & 0xffff) break; + n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; + n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; + n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; + n = n & 0x3; w = 2; + } while (0); + lz = emit_clz64(n); + tz = emit_ctz64(n); + if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ + if (inv) + return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); + else + return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); +} + +static uint32_t emit_isfpk64(uint64_t n) +{ + uint64_t etop9 = ((n >> 54) & 0x1ff); + if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { + return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); + } + return ~0u; +} + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); +} + +static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); +} + +static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); +} + +static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); +} + +static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); +} + +static void emit_d(ASMState *as, A64Ins ai, Reg rd) +{ + *--as->mcp = ai | A64F_D(rd); +} + +static void emit_n(ASMState *as, A64Ins ai, Reg rn) +{ + *--as->mcp = ai | A64F_N(rn); +} + +static int emit_checkofs(A64Ins ai, int64_t ofs) +{ + int scale = (ai >> 30) & 3; + if (ofs < 0 || (ofs & ((1<<scale)-1))) { + return (ofs >= -256 && ofs <= 255) ? -1 : 0; + } else { + return (ofs < (4096<<scale)) ? 1 : 0; + } +} + +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) +{ + int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; + lua_assert(ot); + /* Combine LDR/STR pairs to LDP/STP. */ + if ((sc == 2 || sc == 3) && + (!(ai & 0x400000) || rd != rn) && + as->mcp != as->mcloop) { + uint32_t prev = *as->mcp & ~A64F_D(31); + int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); + A64Ins aip; + if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); + } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); + ofsm = ofs; + } else { + goto nopair; + } + if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { + *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | + (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); + return; + } + } +nopair: + if (ot == 1) + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); + else + *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= ASMREF_L) + +/* Try to find an N-step delta relative to other consts with N < lim. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +{ + RegSet work = ~as->freeset & RSET_GPR; + if (lim <= 1) return 0; /* Can't beat that. */ + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); + lua_assert(r != rd); + if (ref < REF_TRUE) { + uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : + get_k64val(IR(ref)); + int64_t delta = (int64_t)(k - kx); + if (delta == 0) { + emit_dm(as, A64I_MOVx, rd, r); + return 1; + } else { + uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); + if (k12) { + emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + return 1; + } + /* Do other ops or multi-step deltas pay off? Probably not. + ** E.g. XOR rarely helps with pointer consts. + */ + } + } + rset_clear(work, r); + } + return 0; /* Failed. */ +} + +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +{ + uint32_t k13 = emit_isk13(u64, is64); + if (k13) { /* Can the constant be represented as a bitmask immediate? */ + emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); + } else { + int i, zeros = 0, ones = 0, neg; + if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ + /* Count homogeneous 16 bit fragments. */ + for (i = 0; i < 4; i++) { + uint64_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag == 0); + ones += (frag == 0xffff); + } + neg = ones > zeros; /* Use MOVN if it pays off. */ + if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { + int shift = 0, lshift = 0; + uint64_t n64 = neg ? ~u64 : u64; + if (n64 != 0) { + /* Find first/last fragment to be filled. */ + shift = (63-emit_clz64(n64)) & ~15; + lshift = emit_ctz64(n64) & ~15; + } + /* MOVK requires the original value (u64). */ + while (shift > lshift) { + uint32_t u16 = (u64 >> shift) & 0xffff; + /* Skip fragments that are correctly filled by MOVN/MOVZ. */ + if (u16 != (neg ? 0xffff : 0)) + emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); + shift -= 16; + } + /* But MOVN needs an inverted value (n64). */ + emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | + A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); + } + } +} + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) + +/* Load a 64 bit constant into a GPR. */ +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) + +#define glofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) +#define checkmcpofs(as, k) \ + ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) +{ + /* First, check if ip + offset is in range. */ + if ((ai & 0x00400000) && checkmcpofs(as, p)) { + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); + } else { + Reg base = RID_GL; /* Next, try GL + offset. */ + int64_t ofs = glofs(as, p); + if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; + } + emit_lso(as, ai, r, base, ofs); + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + int64_t ofs; + if (r >= RID_MAX_GPR) { + uint32_t fpk = emit_isfpk64(*k); + if (fpk != ~0u) { + emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); + return; + } + } + ofs = glofs(as, k); + if (emit_checkofs(A64I_LDRx, ofs)) { + emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, + (r & 31), RID_GL, ofs); + } else { + if (r >= RID_MAX_GPR) { + emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); + r = RID_TMP; + } + if (checkmcpofs(as, k)) + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); + else + emit_loadu64(as, r, *k); + } +} + +/* Get/set global_State fields. */ +#define emit_getgl(as, r, field) \ + emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) +#define emit_setgl(as, r, field) \ + emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) + +/* Trace number is determined from pc of exit instruction. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x40000) >> 19) == 0); + *p = A64I_BCC | A64F_S19(delta) | cond; +} + +static void emit_branch(ASMState *as, A64Ins ai, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x02000000) >> 26) == 0); + *p = ai | ((uint32_t)delta & 0x03ffffffu); +} + +static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); + if (bit > 31) ai |= A64I_X; + *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; +} + +static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x40000) >> 19) == 0); + *p = ai | A64F_S19(delta) | r; +} + +#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) + +static void emit_call(ASMState *as, void *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; + if ((((delta>>2) + 0x02000000) >> 26) == 0) { + *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); + } else { /* Target out of range: need indirect call. But don't use R0-R7. */ + Reg r = ra_allock(as, i64ptr(target), + RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + *p = A64I_BLR | A64F_N(r); + } +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (dst >= RID_MAX_GPR) { + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, + (dst & 31), (src & 31)); + return; + } + if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ + MCode ins = *as->mcp, swp = (src^dst); + if ((ins & 0xbf800000) == 0xb9000000) { + if (!((ins ^ (dst << 5)) & 0x000003e0)) + *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ + if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) + *as->mcp = ins ^ swp; /* Swap D in store. */ + } + } + emit_dm(as, A64I_MOVx, dst, src); +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, + int32_t i, RegSet allow) +{ + uint32_t k = emit_isk12(i); + if (k) + emit_dn(as, ai^k, dest, src); + else + emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, + ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); +} + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + diff --git a/luajit-2.1/src/lj_emit_mips.h b/luajit-2.1/src/lj_emit_mips.h index 8e7ee66..8a9ee24 100644 --- a/luajit-2.1/src/lj_emit_mips.h +++ b/luajit-2.1/src/lj_emit_mips.h @@ -1,8 +1,30 @@ /* ** MIPS instruction emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ +#if LJ_64 +static intptr_t get_k64val(IRIns *ir) +{ + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + return ir->i; /* Sign-extended. */ + } +} +#endif + +#if LJ_64 +#define get_kval(ir) get_k64val(ir) +#else +#define get_kval(ir) ((ir)->i) +#endif + /* -- Emit basic instructions --------------------------------------------- */ static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) @@ -35,7 +57,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, src, shift); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp); @@ -44,13 +66,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) } } +#if LJ_64 +static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, + uint32_t lsb) +{ + *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); +} +#endif + /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { @@ -58,9 +88,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i) IRRef ref = regcost_ref(as->cost[r]); lua_assert(r != t); if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + intptr_t delta = (intptr_t)((uintptr_t)i - + (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); if (checki16(delta)) { - emit_tsi(as, MIPSI_ADDIU, t, r, delta); + emit_tsi(as, MIPSI_AADDIU, t, r, delta); return 1; } } @@ -76,8 +107,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) emit_ti(as, MIPSI_LI, r, i); } else { if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { + intptr_t jgl = (intptr_t)(void *)J2G(as->J); + if ((uintptr_t)(i-jgl) < 65536) { emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); return; } else if (emit_kdelta1(as, r, i)) { @@ -92,16 +123,48 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_64 +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + if (checki32((int64_t)u64)) { + emit_loadi(as, r, (int32_t)u64); + } else { + uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); + if (delta < 65536) { + emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); + } else if (emit_kdelta1(as, r, (intptr_t)u64)) { + return; + } else { + if ((u64 & 0xffff)) { + emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); + } + if (((u64 >> 16) & 0xffff)) { + emit_dta(as, MIPSI_DSLL, r, r, 16); + emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); + emit_dta(as, MIPSI_DSLL, r, r, 16); + } else { + emit_dta(as, MIPSI_DSLL32, r, r, 0); + } + emit_loadi(as, r, (int32_t)(u64 >> 32)); + } + /* TODO: There are probably more optimization opportunities. */ + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) +#else #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#endif -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); -static void ra_allockreg(ASMState *as, int32_t k, Reg r); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) { - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); + intptr_t jgl = (intptr_t)(J2G(as->J)); + intptr_t i = (intptr_t)(p); Reg base; if ((uint32_t)(i-jgl) < 65536) { i = i-jgl-32768; @@ -112,8 +175,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) emit_tsi(as, mi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) +#if LJ_64 +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + r64 = RID_TMP; + emit_tg(as, MIPSI_DMTC1, r64, r); + } + if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) + emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); + else + emit_loadu64(as, r64, *k); +} +#else +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) +#endif /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) @@ -122,9 +201,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) } #define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) #define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) /* Trace number is determined from per-trace exit stubs. */ #define emit_setvmstate(as, i) UNUSED(i) @@ -152,16 +231,19 @@ static void emit_jmp(ASMState *as, MCode *target) emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); } -static void emit_call(ASMState *as, void *target) +static void emit_call(ASMState *as, void *target, int needcfa) { MCode *p = as->mcp; *--p = MIPSI_NOP; - if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) - *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); - else /* Target out of range: need indirect call. */ + if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { + *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | + (((uintptr_t)target >>2) & 0x03ffffffu); + } else { /* Target out of range: need indirect call. */ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); + needcfa = 1; + } as->mcp = p; - ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); + if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); } /* -- Emit generic operations --------------------------------------------- */ @@ -182,7 +264,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_LW, r, base, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, (r & 31), base, ofs); @@ -192,7 +274,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_SW, r, base, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, (r&31), base, ofs); @@ -203,7 +285,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_ADDIU, r, r, ofs); + emit_tsi(as, MIPSI_AADDIU, r, r, ofs); } } diff --git a/luajit-2.1/src/lj_emit_ppc.h b/luajit-2.1/src/lj_emit_ppc.h index 087860e..21c3c2a 100644 --- a/luajit-2.1/src/lj_emit_ppc.h +++ b/luajit-2.1/src/lj_emit_ppc.h @@ -1,6 +1,6 @@ /* ** PPC instruction emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) @@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) emit_tai(as, pi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) diff --git a/luajit-2.1/src/lj_emit_x86.h b/luajit-2.1/src/lj_emit_x86.h index ac42db3..5207f9d 100644 --- a/luajit-2.1/src/lj_emit_x86.h +++ b/luajit-2.1/src/lj_emit_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 instruction emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -13,10 +13,17 @@ if (rex != 0x40) *--(p) = rex; } #define FORCE_REX 0x200 #define REX_64 (FORCE_REX|0x080000) +#define VEX_64 0x800000 #else #define REXRB(p, rr, rb) ((void)0) #define FORCE_REX 0 #define REX_64 0 +#define VEX_64 0 +#endif +#if LJ_GC64 +#define REX_GC64 REX_64 +#else +#define REX_GC64 0 #endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) @@ -31,6 +38,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, MCode *p, int delta) { int n = (int8_t)xo; + if (n == -60) { /* VEX-encoded instruction */ +#if LJ_64 + xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; +#endif + *(uint32_t *)(p+delta-5) = (uint32_t)xo; + return p+delta-5; + } #if defined(__GNUC__) if (__builtin_constant_p(xo) && n == -2) p[delta-2] = (MCode)(xo >> 24); @@ -85,26 +99,17 @@ static int32_t ptr2addr(const void *p) #define ptr2addr(p) (i32ptr((p))) #endif -/* op r, [addr] */ -static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) -{ - MCode *p = as->mcp; - *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 - p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif -} - /* op r, [base+ofs] */ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) { MCode *p = as->mcp; x86Mode mode; if (ra_hasreg(rb)) { - if (ofs == 0 && (rb&7) != RID_EBP) { + if (LJ_GC64 && rb == RID_RIP) { + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = ofs; + } else if (ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; } else if (checki8(ofs)) { *--p = (MCode)ofs; @@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; #endif + } else if (LJ_GC64 && rb == RID_RIP) { + lua_assert(as->mrm.idx == RID_NONE); + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; } else { if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; @@ -255,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) /* Get/set global_State fields. */ #define emit_opgl(as, xo, r, field) \ emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) -#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) -#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) +#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) +#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) #define emit_setvmstate(as, i) \ (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) @@ -279,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_GC64 +#define dispofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) +#define mctopofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) +/* mov r, addr */ +#define emit_loada(as, r, addr) \ + emit_loadu64(as, (r), (uintptr_t)(addr)) +#else /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadi(as, (r), ptr2addr((addr))) +#endif #if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ @@ -293,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); +#if LJ_GC64 + } else if (checki32(dispofs(as, u64))) { + emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); + } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { + /* Since as->realign assumes the code size doesn't change, check + ** RIP-relative addressing reachability for both as->mcp and as->mctop. + */ + emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); +#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -304,13 +335,71 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } #endif -/* movsd r, [&tv->n] / xorps r, r */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +/* op r, [addr] */ +static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) { - if (tvispzero(tv)) /* Use xor only for +0. */ - emit_rr(as, XO_XORPS, r, r); - else - emit_rma(as, XO_MOVSD, r, &tv->n); +#if LJ_GC64 + if (checki32(dispofs(as, addr))) { + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); + } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { + emit_rmro(as, xo, rr, rr, 0); + emit_loadu64(as, rr, (uintptr_t)addr); + } else +#endif + { + MCode *p = as->mcp; + *(int32_t *)(p-4) = ptr2addr(addr); +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); +#else + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); +#endif + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + Reg r64; + x86Op xo; + const uint64_t *k = &ir_k64(ir)->u64; + if (rset_test(RSET_FPR, r)) { + r64 = r; + xo = XO_MOVSD; + } else { + r64 = r | REX_64; + xo = XO_MOV; + } + if (*k == 0) { + emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); +#if LJ_GC64 + } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || + (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { + emit_rma(as, xo, r64, k); + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { + emit_loadu64(as, r, *k); + return; + } else { + /* If all else fails, add the FP constant at the MCode area bottom. */ + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t *)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); + } + emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); +#else + } else { + emit_rma(as, xo, r64, k); +#endif + } } /* -- Emit control-flow instructions -------------------------------------- */ @@ -412,8 +501,10 @@ static void emit_call_(ASMState *as, MCode *target) /* Use 64 bit operations to handle 64 bit IR types. */ #if LJ_64 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) +#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) #else #define REX_64IR(ir, r) (r) +#define VEX_64IR(ir, r) (r) #endif /* Generic move between two regs. */ @@ -449,9 +540,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r, r, ofs); + emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); else - emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/luajit-2.1/src/lj_err.c b/luajit-2.1/src/lj_err.c index 9ac0c98..b6be357 100644 --- a/luajit-2.1/src/lj_err.c +++ b/luajit-2.1/src/lj_err.c @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_err_c @@ -46,7 +46,8 @@ ** the wrapper function feature. Lua errors thrown through C++ frames ** cannot be caught by C++ code and C++ destructors are not run. ** -** EXT is the default on x64 systems, INT is the default on all other systems. +** EXT is the default on x64 systems and on Windows, INT is the default on all +** other systems. ** ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled @@ -55,7 +56,6 @@ ** and all C libraries that have callbacks which may be used to call back ** into Lua. C++ code must *not* be compiled with -fno-exceptions. ** -** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. ** EXT is mandatory on WIN64 since the calling convention has an abundance ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). @@ -63,7 +63,7 @@ #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND #define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS +#elif LJ_TARGET_WINDOWS #define LJ_UNWIND_EXT 1 #endif @@ -190,13 +190,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) ** since various OS, distros and compilers mess up the header installation. */ -typedef struct _Unwind_Exception -{ - uint64_t exclass; - void (*excleanup)(int, struct _Unwind_Exception *); - uintptr_t p1, p2; -} __attribute__((__aligned__)) _Unwind_Exception; - typedef struct _Unwind_Context _Unwind_Context; #define _URC_OK 0 @@ -206,8 +199,20 @@ typedef struct _Unwind_Context _Unwind_Context; #define _URC_CONTINUE_UNWIND 8 #define _URC_FAILURE 9 +#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ +#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) +#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) +#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) + #if !LJ_TARGET_ARM +typedef struct _Unwind_Exception +{ + uint64_t exclass; + void (*excleanup)(int, struct _Unwind_Exception *); + uintptr_t p1, p2; +} __attribute__((__aligned__)) _Unwind_Exception; + extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); @@ -219,11 +224,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *); #define _UA_HANDLER_FRAME 4 #define _UA_FORCE_UNWIND 8 -#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ -#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) -#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) -#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) - /* DWARF2 personality handler referenced from interpreter .eh_frame. */ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) @@ -302,10 +302,22 @@ static void err_raise_ext(int errcode) } #endif -#else +#else /* LJ_TARGET_ARM */ + +#define _US_VIRTUAL_UNWIND_FRAME 0 +#define _US_UNWIND_FRAME_STARTING 1 +#define _US_ACTION_MASK 3 +#define _US_FORCE_UNWIND 8 -extern void _Unwind_DeleteException(void *); -extern int __gnu_unwind_frame (void *, _Unwind_Context *); +typedef struct _Unwind_Control_Block _Unwind_Control_Block; + +struct _Unwind_Control_Block { + uint64_t exclass; + uint32_t misc[20]; +}; + +extern int _Unwind_RaiseException(_Unwind_Control_Block *); +extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); @@ -321,35 +333,58 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) _Unwind_VRS_Set(ctx, 0, r, 0, &v); } -#define _US_VIRTUAL_UNWIND_FRAME 0 -#define _US_UNWIND_FRAME_STARTING 1 -#define _US_ACTION_MASK 3 -#define _US_FORCE_UNWIND 8 +extern void lj_vm_unwind_ext(void); /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ -LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) +LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, + _Unwind_Context *ctx) { void *cf = (void *)_Unwind_GetGR(ctx, 13); lua_State *L = cframe_L(cf); - if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + int errcode; + + switch ((state & _US_ACTION_MASK)) { + case _US_VIRTUAL_UNWIND_FRAME: + if ((state & _US_FORCE_UNWIND)) break; return _URC_HANDLER_FOUND; - } - if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { - _Unwind_DeleteException(ucb); - _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); - _Unwind_SetGR(ctx, 0, (uint32_t)L); - _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); + case _US_UNWIND_FRAME_STARTING: + if (LJ_UEXCLASS_CHECK(ucb->exclass)) { + errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); + } else { + errcode = LUA_ERRRUN; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + } + cf = err_unwind(L, cf, errcode); + if ((state & _US_FORCE_UNWIND) || cf == NULL) break; + _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); + _Unwind_SetGR(ctx, 0, (uint32_t)ucb); + _Unwind_SetGR(ctx, 1, (uint32_t)errcode); + _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? + (uint32_t)lj_vm_unwind_ff_eh : + (uint32_t)lj_vm_unwind_c_eh); return _URC_INSTALL_CONTEXT; + default: + return _URC_FAILURE; } if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) return _URC_FAILURE; return _URC_CONTINUE_UNWIND; } +#if LJ_UNWIND_EXT +static __thread _Unwind_Control_Block static_uex; + +static void err_raise_ext(int errcode) +{ + memset(&static_uex, 0, sizeof(static_uex)); + static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); + _Unwind_RaiseException(&static_uex); +} #endif -#elif LJ_TARGET_X64 && LJ_ABI_WIN +#endif /* LJ_TARGET_ARM */ + +#elif LJ_ABI_WIN /* ** Someone in Redmond owes me several days of my life. A lot of this is @@ -367,6 +402,7 @@ LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) #define WIN32_LEAN_AND_MEAN #include <windows.h> +#if LJ_TARGET_X64 /* Taken from: http://www.nynaeve.net/?p=99 */ typedef struct UndocumentedDispatcherContext { ULONG64 ControlPc; @@ -381,11 +417,14 @@ typedef struct UndocumentedDispatcherContext { ULONG ScopeIndex; ULONG Fill0; } UndocumentedDispatcherContext; +#else +typedef void *UndocumentedDispatcherContext; +#endif /* Another wild guess. */ extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); -#ifdef MINGW_SDK_INIT +#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) /* Workaround for broken MinGW64 declaration. */ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); #define RtlUnwindEx RtlUnwindEx_FIXED @@ -399,10 +438,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) -/* Win64 exception handler for interpreter frame. */ -LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, - void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +/* Windows exception handler for interpreter frame. */ +LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, + void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) { +#if LJ_TARGET_X64 + void *cf = f; +#else + void *cf = (char *)f - CFRAME_OFS_SEH; +#endif lua_State *L = cframe_L(cf); int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; @@ -420,8 +464,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { /* Don't catch access violations etc. */ - return ExceptionContinueSearch; + return 1; /* ExceptionContinueSearch */ } +#if LJ_TARGET_X64 /* Unwind the stack and call all handlers for all lower C frames ** (including ourselves) again with EH_UNWINDING set. Then set ** rsp = cf, rax = errcode and jump to the specified target. @@ -431,9 +476,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, lj_vm_unwind_c_eh), rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); /* RtlUnwindEx should never return. */ +#else + UNUSED(ctx); + UNUSED(dispatch); + /* Call all handlers for all lower C frames (including ourselves) again + ** with EH_UNWINDING set. Then call the specified function, passing cf + ** and errcode. + */ + lj_vm_rtlunwind(cf, (void *)rec, + (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); + /* lj_vm_rtlunwind does not return. */ +#endif } } - return ExceptionContinueSearch; + return 1; /* ExceptionContinueSearch */ } /* Raise Windows exception. */ @@ -452,7 +509,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) global_State *g = G(L); lj_trace_abort(g); setmref(g->jit_base, NULL); - L->status = 0; + L->status = LUA_OK; #if LJ_UNWIND_EXT err_raise_ext(errcode); /* diff --git a/luajit-2.1/src/lj_err.h b/luajit-2.1/src/lj_err.h index 03a56f0..cba5fb7 100644 --- a/luajit-2.1/src/lj_err.h +++ b/luajit-2.1/src/lj_err.h @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ERR_H diff --git a/luajit-2.1/src/lj_errmsg.h b/luajit-2.1/src/lj_errmsg.h index 7717665..060a9f8 100644 --- a/luajit-2.1/src/lj_errmsg.h +++ b/luajit-2.1/src/lj_errmsg.h @@ -1,6 +1,6 @@ /* ** VM error messages. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different ERRDEF macros. */ diff --git a/luajit-2.1/src/lj_ff.h b/luajit-2.1/src/lj_ff.h index 73dad96..31d65a0 100644 --- a/luajit-2.1/src/lj_ff.h +++ b/luajit-2.1/src/lj_ff.h @@ -1,6 +1,6 @@ /* ** Fast function IDs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FF_H diff --git a/luajit-2.1/src/lj_ffrecord.c b/luajit-2.1/src/lj_ffrecord.c index 281f017..dfdee2d 100644 --- a/luajit-2.1/src/lj_ffrecord.c +++ b/luajit-2.1/src/lj_ffrecord.c @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c @@ -102,42 +102,41 @@ static void recff_stitch(jit_State *J) ASMFunction cont = lj_cont_stitch; lua_State *L = J->L; TValue *base = L->base; + BCReg nslot = J->maxslot + 1 + LJ_FR2; + TValue *nframe = base + 1 + LJ_FR2; const BCIns *pc = frame_pc(base-1); TValue *pframe = frame_prevl(base-1); - TRef trcont; - lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */ /* Move func + args up in Lua stack and insert continuation. */ - memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); - setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); - setcont(base, cont); + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); + setcont(base-LJ_FR2, cont); setframe_pc(base, pc); - setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ - L->base += 2; - L->top += 2; + setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ + L->base += 2 + LJ_FR2; + L->top += 2 + LJ_FR2; /* Ditto for the IR. */ - memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); -#if LJ_64 - trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); + memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); +#if LJ_FR2 + J->base[2] = TREF_FRAME; + J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; #else - trcont = lj_ir_kptr(J, (void *)cont); + J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; #endif - J->base[0] = trcont | TREF_CONT; - J->ktracep = lj_ir_k64_reserve(J); - lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); - J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0); - J->base += 2; - J->baseslot += 2; + J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); + J->base += 2 + LJ_FR2; + J->baseslot += 2 + LJ_FR2; J->framedepth++; lj_record_stop(J, LJ_TRLINK_STITCH, 0); /* Undo Lua stack changes. */ - memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); setframe_pc(base-1, pc); - L->base -= 2; - L->top -= 2; + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; } /* Fallback handler for fast functions that are not recorded (yet). */ @@ -179,7 +178,7 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) /* Emit BUFHDR for the global temporary buffer. */ static TRef recff_bufhdr(jit_State *J) { - return emitir(IRT(IR_BUFHDR, IRT_P32), + return emitir(IRT(IR_BUFHDR, IRT_PGC), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); } @@ -229,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) ix.tab = tr; copyTV(J->L, &ix.tabv, &rd->argv[0]); lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ - fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); + fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META); mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); if (!tref_isnil(mt)) @@ -295,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) if (strV(tv)->len == 1) { emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); } else { - TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); + TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); } @@ -380,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) int errcode; TValue argv0; /* Temporarily insert metamethod below object. */ - J->base[1] = J->base[0]; + J->base[1+LJ_FR2] = J->base[0]; J->base[0] = ix.mobj; copyTV(J->L, &argv0, &rd->argv[0]); - copyTV(J->L, &rd->argv[1], &rd->argv[0]); + copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); copyTV(J->L, &rd->argv[0], &ix.mobjv); /* Need to protect lj_record_tailcall because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); @@ -450,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); +#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -469,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) TValue argv0, argv1; TRef tmp; int errcode; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ /* Swap function and traceback. */ tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); +#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ @@ -504,7 +510,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); - J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); + J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); UNUSED(rd); } @@ -613,10 +619,8 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { - TRef tr = lj_ir_tonum(J, J->base[0]); - if (!tref_isnumber_str(J->base[1])) - lj_trace_err(J, LJ_TRERR_BADTYPE); - J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]); + J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], + &rd->argv[0], &rd->argv[1]); UNUSED(rd); } @@ -822,7 +826,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) /* Also handle empty range here, to avoid extra traces. */ TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); emitir(IRTGI(IR_GE), trslen, tr0); - trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); + trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); } else { /* Range underflow: return empty string. */ emitir(IRTGI(IR_LT), trend, trstart); @@ -838,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) rd->nres = len; for (i = 0; i < len; i++) { TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); - tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); + tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp); J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); } } else { /* Empty range or range underflow: return no results. */ @@ -860,7 +864,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) if (i > 1) { /* Concatenate the strings, if there's more than one. */ TRef hdr = recff_bufhdr(J), tr = hdr; for (i = 0; J->base[i] != 0; i++) - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } UNUSED(rd); @@ -877,14 +881,14 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); if (vrep > 1) { TRef hdr2 = recff_bufhdr(J); - TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep); - tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str); + TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep); + tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str); str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); } } tr = hdr = recff_bufhdr(J); if (str2) { - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str); str = str2; rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); } @@ -935,8 +939,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) if ((J->base[2] && tref_istruecond(J->base[3])) || (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), !lj_str_haspattern(pat))) { /* Search for fixed string. */ - TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); - TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0); + TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); + TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0); TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); @@ -944,13 +948,13 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), str->len-(MSize)start, pat->len)) { TRef pos; - emitir(IRTG(IR_NE, IRT_P32), tr, trp0); - pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0)); + emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); + pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0)); J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); rd->nres = 2; } else { - emitir(IRTG(IR_EQ, IRT_P32), tr, trp0); + emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0); J->base[0] = TREF_NIL; } } else { /* Search for pattern. */ @@ -977,7 +981,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) IRCallID id; switch (STRFMT_TYPE(sf)) { case STRFMT_LIT: - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); break; case STRFMT_INT: @@ -986,7 +990,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) if (!tref_isinteger(tra)) goto handle_num; if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); } else { #if LJ_HASFFI @@ -1016,7 +1020,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) return; } if (sf == STRFMT_STR) /* Shortcut for plain %s. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra); else if ((sf & STRFMT_T_QUOTED)) tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); else @@ -1025,7 +1029,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) case STRFMT_CHAR: tra = lj_opt_narrow_toint(J, tra); if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); else tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); @@ -1110,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); +#else tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); +#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) @@ -1133,7 +1142,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) ptrdiff_t i = rd->data == 0 ? 1 : 0; for (; J->base[i]; i++) { TRef str = lj_ir_tostr(J, J->base[i]); - TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); + TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero); TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); if (tref_isk(len) && IR(tref_ref(len))->i == 1) { IRIns *irs = IR(tref_ref(str)); diff --git a/luajit-2.1/src/lj_ffrecord.h b/luajit-2.1/src/lj_ffrecord.h index f858ca2..3b40745 100644 --- a/luajit-2.1/src/lj_ffrecord.h +++ b/luajit-2.1/src/lj_ffrecord.h @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FFRECORD_H diff --git a/luajit-2.1/src/lj_frame.h b/luajit-2.1/src/lj_frame.h index a86c36b..19c49a4 100644 --- a/luajit-2.1/src/lj_frame.h +++ b/luajit-2.1/src/lj_frame.h @@ -1,6 +1,6 @@ /* ** Stack frames. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FRAME_H @@ -116,6 +116,17 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ /* These definitions must match with the arch-specific *.dasc files. */ #if LJ_TARGET_X86 +#if LJ_ABI_WIN +#define CFRAME_OFS_ERRF (19*4) +#define CFRAME_OFS_NRES (18*4) +#define CFRAME_OFS_PREV (17*4) +#define CFRAME_OFS_L (16*4) +#define CFRAME_OFS_SEH (9*4) +#define CFRAME_OFS_PC (6*4) +#define CFRAME_OFS_MULTRES (5*4) +#define CFRAME_SIZE (16*4) +#define CFRAME_SHIFT_MULTRES 0 +#else #define CFRAME_OFS_ERRF (15*4) #define CFRAME_OFS_NRES (14*4) #define CFRAME_OFS_PREV (13*4) @@ -124,6 +135,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_MULTRES (5*4) #define CFRAME_SIZE (12*4) #define CFRAME_SHIFT_MULTRES 0 +#endif #elif LJ_TARGET_X64 #if LJ_ABI_WIN #define CFRAME_OFS_PREV (13*8) @@ -217,14 +229,40 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_SIZE 272 #define CFRAME_SHIFT_MULTRES 3 #endif -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 +#if LJ_ARCH_HASFPU #define CFRAME_OFS_ERRF 124 #define CFRAME_OFS_NRES 120 #define CFRAME_OFS_PREV 116 #define CFRAME_OFS_L 112 +#define CFRAME_SIZE 112 +#else +#define CFRAME_OFS_ERRF 76 +#define CFRAME_OFS_NRES 72 +#define CFRAME_OFS_PREV 68 +#define CFRAME_OFS_L 64 +#define CFRAME_SIZE 64 +#endif #define CFRAME_OFS_PC 20 #define CFRAME_OFS_MULTRES 16 -#define CFRAME_SIZE 112 +#define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_MIPS64 +#if LJ_ARCH_HASFPU +#define CFRAME_OFS_ERRF 188 +#define CFRAME_OFS_NRES 184 +#define CFRAME_OFS_PREV 176 +#define CFRAME_OFS_L 168 +#define CFRAME_OFS_PC 160 +#define CFRAME_SIZE 192 +#else +#define CFRAME_OFS_ERRF 124 +#define CFRAME_OFS_NRES 120 +#define CFRAME_OFS_PREV 112 +#define CFRAME_OFS_L 104 +#define CFRAME_OFS_PC 96 +#define CFRAME_SIZE 128 +#endif +#define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" diff --git a/luajit-2.1/src/lj_func.c b/luajit-2.1/src/lj_func.c index eb8a9db..639dad8 100644 --- a/luajit-2.1/src/lj_func.c +++ b/luajit-2.1/src/lj_func.c @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -140,7 +140,9 @@ GCfunc *lj_func_newL_empty(lua_State *L, GCproto *pt, GCtab *env) /* NOBARRIER: The GCfunc is new (marked white). */ for (i = 0; i < nuv; i++) { GCupval *uv = func_emptyuv(L); - uv->dhash = (uint32_t)(uintptr_t)pt ^ ((uint32_t)proto_uv(pt)[i] << 24); + int32_t v = proto_uv(pt)[i]; + uv->immutable = ((v / PROTO_UV_IMMUTABLE) & 1); + uv->dhash = (uint32_t)(uintptr_t)pt ^ (v << 24); setgcref(fn->l.uvptr[i], obj2gco(uv)); } fn->l.nupvalues = (uint8_t)nuv; diff --git a/luajit-2.1/src/lj_func.h b/luajit-2.1/src/lj_func.h index a6e534e..901751b 100644 --- a/luajit-2.1/src/lj_func.h +++ b/luajit-2.1/src/lj_func.h @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FUNC_H diff --git a/luajit-2.1/src/lj_gc.c b/luajit-2.1/src/lj_gc.c index afd3997..2aaf5b2 100644 --- a/luajit-2.1/src/lj_gc.c +++ b/luajit-2.1/src/lj_gc.c @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -169,12 +169,19 @@ static int gc_traverse_tab(global_State *g, GCtab *t) while ((c = *modestr++)) { if (c == 'k') weak |= LJ_GC_WEAKKEY; else if (c == 'v') weak |= LJ_GC_WEAKVAL; - else if (c == 'K') weak = (int)(~0u & ~LJ_GC_WEAKVAL); } - if (weak > 0) { /* Weak tables are cleared in the atomic phase. */ - t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); - setgcrefr(t->gclist, g->gc.weak); - setgcref(g->gc.weak, obj2gco(t)); + if (weak) { /* Weak tables are cleared in the atomic phase. */ +#if LJ_HASFFI + CTState *cts = ctype_ctsG(g); + if (cts && cts->finalizer == t) { + weak = (int)(~0u & ~LJ_GC_WEAKVAL); + } else +#endif + { + t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); + setgcrefr(t->gclist, g->gc.weak); + setgcref(g->gc.weak, obj2gco(t)); + } } } if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */ @@ -238,6 +245,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) IRIns *ir = &T->ir[ref]; if (ir->o == IR_KGC) gc_markobj(g, ir_kgc(ir)); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } if (T->link) gc_marktrace(g, T->link); if (T->nextroot) gc_marktrace(g, T->nextroot); @@ -308,7 +317,7 @@ static size_t propagatemark(global_State *g) if (gc_traverse_tab(g, t) > 0) black2gray(o); /* Keep weak tables gray. */ return sizeof(GCtab) + sizeof(TValue) * t->asize + - sizeof(Node) * (t->hmask + 1); + (t->hmask ? sizeof(Node) * (t->hmask + 1) : 0); } else if (LJ_LIKELY(gct == ~LJ_TFUNC)) { GCfunc *fn = gco2func(o); gc_traverse_func(g, fn); diff --git a/luajit-2.1/src/lj_gc.h b/luajit-2.1/src/lj_gc.h index 847eb78..669bbe9 100644 --- a/luajit-2.1/src/lj_gc.h +++ b/luajit-2.1/src/lj_gc.h @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GC_H diff --git a/luajit-2.1/src/lj_gdbjit.c b/luajit-2.1/src/lj_gdbjit.c index 9b95e52..c219ffa 100644 --- a/luajit-2.1/src/lj_gdbjit.c +++ b/luajit-2.1/src/lj_gdbjit.c @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_gdbjit_c @@ -296,6 +296,9 @@ enum { #elif LJ_TARGET_ARM DW_REG_SP = 13, DW_REG_RA = 14, +#elif LJ_TARGET_ARM64 + DW_REG_SP = 31, + DW_REG_RA = 30, #elif LJ_TARGET_PPC DW_REG_SP = 1, DW_REG_RA = 65, @@ -374,6 +377,8 @@ static const ELFheader elfhdr_template = { .machine = 62, #elif LJ_TARGET_ARM .machine = 40, +#elif LJ_TARGET_ARM64 + .machine = 183, #elif LJ_TARGET_PPC .machine = 20, #elif LJ_TARGET_MIPS @@ -563,6 +568,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) int i; for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } } +#elif LJ_TARGET_ARM64 + { + int i; + DB(DW_CFA_offset|31); DUV(2); + for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } + for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } + } #elif LJ_TARGET_PPC { int i; @@ -719,6 +731,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx) /* -- Interface to GDB JIT API -------------------------------------------- */ +static int gdbjit_lock; + +static void gdbjit_lock_acquire() +{ + while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { + /* Just spin; futexes or pthreads aren't worth the portability cost. */ + } +} + +static void gdbjit_lock_release() +{ + __sync_lock_release(&gdbjit_lock); +} + /* Add new entry to GDB JIT symbol chain. */ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) { @@ -730,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) ctx->T->gdbjit_entry = (void *)eo; /* Link new entry to chain and register it. */ eo->entry.prev_entry = NULL; + gdbjit_lock_acquire(); eo->entry.next_entry = __jit_debug_descriptor.first_entry; if (eo->entry.next_entry) eo->entry.next_entry->prev_entry = &eo->entry; @@ -739,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); } /* Add debug info for newly compiled trace and notify GDB. */ @@ -770,6 +798,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) { GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; if (eo) { + gdbjit_lock_acquire(); if (eo->entry.prev_entry) eo->entry.prev_entry->next_entry = eo->entry.next_entry; else @@ -779,6 +808,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); lj_mem_free(J2G(J), eo, eo->sz); } } diff --git a/luajit-2.1/src/lj_gdbjit.h b/luajit-2.1/src/lj_gdbjit.h index 49c5863..bbaa156 100644 --- a/luajit-2.1/src/lj_gdbjit.h +++ b/luajit-2.1/src/lj_gdbjit.h @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GDBJIT_H diff --git a/luajit-2.1/src/lj_ir.c b/luajit-2.1/src/lj_ir.c index 567aec8..5baece6 100644 --- a/luajit-2.1/src/lj_ir.c +++ b/luajit-2.1/src/lj_ir.c @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) emitter. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ir_c @@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J) IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim); + lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -145,6 +145,16 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) return emitir(CCI_OPTYPE(ci), tr, id); } +/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ +LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) +{ + lua_assert((ofs & 3) == 0); + ofs >>= 2; + lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ + lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); + return lj_opt_fold(J); +} + /* -- Interning of constants ---------------------------------------------- */ /* @@ -165,6 +175,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) return ref; } +/* Get ref of next 64 bit IR constant and optionally grow IR. +** Note: this may invalidate all IRIns *! +*/ +static LJ_AINLINE IRRef ir_nextk64(jit_State *J) +{ + IRRef ref = J->cur.nk - 2; + lua_assert(J->state != LJ_TRACE_ASM); + if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = ref; + return ref; +} + +#if LJ_GC64 +#define ir_nextkgc ir_nextk64 +#else +#define ir_nextkgc ir_nextk +#endif + /* Intern int32_t constant. */ TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) { @@ -184,95 +212,21 @@ found: return TREF(ref, IRT_INT); } -/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the -** 64 bit constant. The constants themselves are stored in a chained array -** and shared across traces. -** -** Rationale for choosing this data structure: -** - The address of the constants is embedded in the generated machine code -** and must never move. A resizable array or hash table wouldn't work. -** - Most apps need very few non-32 bit integer constants (less than a dozen). -** - Linear search is hard to beat in terms of speed and low complexity. -*/ -typedef struct K64Array { - MRef next; /* Pointer to next list. */ - MSize numk; /* Number of used elements in this array. */ - TValue k[LJ_MIN_K64SZ]; /* Array of constants. */ -} K64Array; - -/* Free all chained arrays. */ -void lj_ir_k64_freeall(jit_State *J) -{ - K64Array *k; - for (k = mref(J->k64, K64Array); k; ) { - K64Array *next = mref(k->next, K64Array); - lj_mem_free(J2G(J), k, sizeof(K64Array)); - k = next; - } - setmref(J->k64, NULL); -} - -/* Get new 64 bit constant slot. */ -static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64) -{ - TValue *ntv; - if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ - K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); - setmref(kn->next, NULL); - kn->numk = 0; - if (kp) - setmref(kp->next, kn); /* Chain to the end of the list. */ - else - setmref(J->k64, kn); /* Link first array. */ - kp = kn; - } - ntv = &kp->k[kp->numk++]; /* Add to current array. */ - ntv->u64 = u64; - return ntv; -} - -/* Find 64 bit constant in chained array or add it. */ -cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) -{ - K64Array *k, *kp = NULL; - MSize idx; - /* Search for the constant in the whole chain of arrays. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { - kp = k; /* Remember previous element in list. */ - for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ - TValue *tv = &k->k[idx]; - if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */ - return tv; - } - } - /* Otherwise add a new constant. */ - return ir_k64_add(J, kp, u64); -} - -TValue *lj_ir_k64_reserve(jit_State *J) -{ - K64Array *k, *kp = NULL; - lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */ - /* Find last K64Array, if any. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k; - return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ -} - -/* Intern 64 bit constant, given by its address. */ -TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) +/* Intern 64 bit constant, given by its 64 bit pattern. */ +TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) { IRIns *ir, *cir = J->cur.ir; IRRef ref; IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (ir_k64(&cir[ref]) == tv) + if (ir_k64(&cir[ref])->u64 == u64) goto found; - ref = ir_nextk(J); + ref = ir_nextk64(J); ir = IR(ref); - lua_assert(checkptrGC(tv)); - setmref(ir->ptr, tv); + ir[1].tv.u64 = u64; ir->t.irt = t; ir->o = op; + ir->op12 = 0; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: @@ -282,13 +236,13 @@ found: /* Intern FP constant, given by its 64 bit pattern. */ TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KNUM, u64); } /* Intern 64 bit integer constant. */ TRef lj_ir_kint64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KINT64, u64); } /* Check whether a number is int and return it. -0 is NOT considered an int. */ @@ -323,15 +277,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert(!LJ_GC64); /* TODO_GC64: major changes required. */ lua_assert(!isdead(J2G(J), o)); for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) if (ir_kgc(&cir[ref]) == o) goto found; - ref = ir_nextk(J); + ref = ir_nextkgc(J); ir = IR(ref); /* NOBARRIER: Current trace is a GC root. */ - setgcref(ir->gcr, o); + ir->op12 = 0; + setgcref(ir[LJ_GC64].gcr, o); ir->t.irt = (uint8_t)t; ir->o = IR_KGC; ir->prev = J->chain[IR_KGC]; @@ -340,24 +294,44 @@ found: return TREF(ref, t); } -/* Intern 32 bit pointer constant. */ +/* Allocate GCtrace constant placeholder (no interning). */ +TRef lj_ir_ktrace(jit_State *J) +{ + IRRef ref = ir_nextkgc(J); + IRIns *ir = IR(ref); + lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); + ir->t.irt = IRT_P64; + ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ + ir->op12 = 0; + ir->prev = 0; + return TREF(ref, IRT_P64); +} + +/* Intern pointer constant. */ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); +#if LJ_64 && !LJ_GC64 + lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); +#endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (mref(cir[ref].ptr, void) == ptr) + if (ir_kptr(&cir[ref]) == ptr) goto found; +#if LJ_GC64 + ref = ir_nextk64(J); +#else ref = ir_nextk(J); +#endif ir = IR(ref); - setmref(ir->ptr, ptr); - ir->t.irt = IRT_P32; + ir->op12 = 0; + setmref(ir[LJ_GC64].ptr, ptr); + ir->t.irt = IRT_PGC; ir->o = op; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: - return TREF(ref, IRT_P32); + return TREF(ref, IRT_PGC); } /* Intern typed NULL constant. */ @@ -412,9 +386,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; - case IR_KPTR: case IR_KKPTR: case IR_KNULL: - setlightudV(tv, mref(ir->ptr, void)); - break; + case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; + case IR_KNULL: setlightudV(tv, NULL); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; #if LJ_HASFFI case IR_KINT64: { diff --git a/luajit-2.1/src/lj_ir.h b/luajit-2.1/src/lj_ir.h index 56e1977..34c2785 100644 --- a/luajit-2.1/src/lj_ir.h +++ b/luajit-2.1/src/lj_ir.h @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) format. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IR_H @@ -220,7 +220,7 @@ IRFLDEF(FLENUM) /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ -#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ +#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ @@ -294,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; /* -- IR instruction types ------------------------------------------------ */ -/* Map of itypes to non-negative numbers. ORDER LJ_T. +#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) + +/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for ** IRT_P32 and IRT_P64, which never escape the IR. ** The various integers are only used in the IR and can only escape to @@ -302,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; ** contiguous and next to IRT_NUM (see the typerange macros below). */ #define IRTDEF(_) \ - _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ - _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ - _(TAB, 4) _(UDATA, 4) \ + _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ + _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ + _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ + _(UDATA, IRTSIZE_PGC) \ _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ - _(SOFTFP, 4) /* There is room for 9 more types. */ + _(SOFTFP, 4) /* There is room for 8 more types. */ /* IR result type and flags (8 bit). */ typedef enum { @@ -318,9 +321,10 @@ IRTDEF(IRTENUM) /* Native pointer type and the corresponding integer type. */ IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, + IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, + IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, - /* TODO_GC64: major changes required for all uses of IRT_P32. */ /* Additional flags. */ IRT_MARK = 0x20, /* Marker for misc. purposes. */ @@ -408,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || t != IRT_LIGHTUD); + lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { @@ -521,7 +525,9 @@ typedef uint32_t TRef; ** +-------+-------+---+---+---+---+ ** | op1 | op2 | t | o | r | s | ** +-------+-------+---+---+---+---+ -** | op12/i/gco | ot | prev | (alternative fields in union) +** | op12/i/gco32 | ot | prev | (alternative fields in union) +** +-------+-------+---+---+---+---+ +** | TValue/gco64 | (2nd IR slot for 64 bit constants) ** +---------------+-------+-------+ ** 32 16 16 ** @@ -549,22 +555,27 @@ typedef union IRIns { ) }; int32_t i; /* 32 bit signed integer literal (overlaps op12). */ - GCRef gcr; /* GCobj constant (overlaps op12). */ - MRef ptr; /* Pointer constant (overlaps op12). */ + GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ + MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ + TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; -/* TODO_GC64: major changes required. */ -#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) +#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) -#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) -#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) +#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) +#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) #define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) + check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (LJ_GC64 && \ + ((ir)->o == IR_KGC || \ + (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ + &(ir)[1].tv) #define ir_kptr(ir) \ - check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) + check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ + mref((ir)[LJ_GC64].ptr, void)) /* A store or any other op with a non-weak guard has a side-effect. */ static LJ_AINLINE int ir_sideeff(IRIns *ir) diff --git a/luajit-2.1/src/lj_ircall.h b/luajit-2.1/src/lj_ircall.h index 84e41ec..973c36e 100644 --- a/luajit-2.1/src/lj_ircall.h +++ b/luajit-2.1/src/lj_ircall.h @@ -1,6 +1,6 @@ /* ** IR CALL* instruction definitions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IRCALL_H @@ -78,7 +78,13 @@ typedef struct CCallInfo { #define IRCALLCOND_SOFTFP_FFI(x) NULL #endif -#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) +#if LJ_SOFTFP && LJ_TARGET_MIPS32 +#define IRCALLCOND_SOFTFP_MIPS(x) x +#else +#define IRCALLCOND_SOFTFP_MIPS(x) NULL +#endif + +#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) #define IRCALLCOND_FP64_FFI(x) x @@ -98,12 +104,6 @@ typedef struct CCallInfo { #define IRCALLCOND_FFI32(x) NULL #endif -#if LJ_TARGET_X86 -#define CCI_RANDFPR 0 /* Clang on OSX/x86 is overzealous. */ -#else -#define CCI_RANDFPR CCI_NOFPRCLOBBER -#endif - #if LJ_SOFTFP #define XA_FP CCI_XA #define XA2_FP (CCI_XA+CCI_XA) @@ -123,40 +123,40 @@ typedef struct CCallInfo { /* Function definitions for CALL* instructions. */ #define IRCALLDEF(_) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ - _(ANY, lj_str_find, 4, N, P32, 0) \ + _(ANY, lj_str_find, 4, N, PGC, 0) \ _(ANY, lj_str_new, 3, S, STR, CCI_L) \ _(ANY, lj_strscan_num, 2, FN, INT, 0) \ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ - _(ANY, lj_strfmt_putint, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putnum, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putquoted, 2, FL, P32, 0) \ - _(ANY, lj_strfmt_putfxint, 3, L, P32, XA_64) \ - _(ANY, lj_strfmt_putfnum_int, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfnum_uint, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfnum, 3, L, P32, XA_FP) \ - _(ANY, lj_strfmt_putfstr, 3, L, P32, 0) \ - _(ANY, lj_strfmt_putfchar, 3, L, P32, 0) \ - _(ANY, lj_buf_putmem, 3, S, P32, 0) \ - _(ANY, lj_buf_putstr, 2, FL, P32, 0) \ - _(ANY, lj_buf_putchar, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ - _(ANY, lj_buf_putstr_rep, 3, L, P32, 0) \ - _(ANY, lj_buf_puttab, 5, L, P32, 0) \ + _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \ + _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \ + _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \ + _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \ + _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \ + _(ANY, lj_buf_putmem, 3, S, PGC, 0) \ + _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \ + _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \ + _(ANY, lj_buf_puttab, 5, L, PGC, 0) \ _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ - _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ + _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ - _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ - _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\ + _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ + _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ _(ANY, sinh, 1, N, NUM, XA_FP) \ _(ANY, cosh, 1, N, NUM, XA_FP) \ @@ -189,6 +189,8 @@ typedef struct CCallInfo { _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ + _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ + _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ @@ -270,6 +272,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define fp64_f2l __aeabi_f2lz #define fp64_f2ul __aeabi_f2ulz #endif +#elif LJ_TARGET_MIPS +#define softfp_add __adddf3 +#define softfp_sub __subdf3 +#define softfp_mul __muldf3 +#define softfp_div __divdf3 +#define softfp_cmp __ledf2 +#define softfp_i2d __floatsidf +#define softfp_d2i __fixdfsi +#define softfp_ui2d __floatunsidf +#define softfp_f2d __extendsfdf2 +#define softfp_d2ui __fixunsdfsi +#define softfp_d2f __truncdfsf2 +#define softfp_i2f __floatsisf +#define softfp_ui2f __floatunsisf +#define softfp_f2i __fixsfsi +#define softfp_f2ui __fixunssfsi #else #error "Missing soft-float definitions for target architecture" #endif @@ -290,6 +308,10 @@ extern float softfp_ui2f(uint32_t a); extern int32_t softfp_f2i(float a); extern uint32_t softfp_f2ui(float a); #endif +#if LJ_TARGET_MIPS +extern double lj_vm_sfmin(double a, double b); +extern double lj_vm_sfmax(double a, double b); +#endif #endif #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) diff --git a/luajit-2.1/src/lj_iropt.h b/luajit-2.1/src/lj_iropt.h index 4106ef8..73aef0e 100644 --- a/luajit-2.1/src/lj_iropt.h +++ b/luajit-2.1/src/lj_iropt.h @@ -1,6 +1,6 @@ /* ** Common header for IR emitter and optimizations. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IROPT_H @@ -36,12 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) return ref; } +LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); + /* Interning of constants. */ LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); -LJ_FUNC void lj_ir_k64_freeall(jit_State *J); -LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); -LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J); -LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); +LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); @@ -49,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); +LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #if LJ_64 #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) @@ -75,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) /* Special 128 bit SIMD constants. */ -#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) -#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) +#define lj_ir_ksimd(J, idx) \ + lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J)) /* Access to constants. */ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); @@ -143,8 +143,8 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); +LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); +LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ diff --git a/luajit-2.1/src/lj_jit.h b/luajit-2.1/src/lj_jit.h index 10900bf..92054e3 100644 --- a/luajit-2.1/src/lj_jit.h +++ b/luajit-2.1/src/lj_jit.h @@ -1,6 +1,6 @@ /* ** Common definitions for the JIT compiler. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_JIT_H @@ -19,10 +19,11 @@ #define JIT_F_SSE4_1 0x00000040 #define JIT_F_PREFER_IMUL 0x00000080 #define JIT_F_LEA_AGU 0x00000100 +#define JIT_F_BMI2 0x00000200 /* Names for the CPU-specific flags. Must match the order above. */ #define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM" +#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" #elif LJ_TARGET_ARM #define JIT_F_ARMV6_ 0x00000010 #define JIT_F_ARMV6T2_ 0x00000020 @@ -45,12 +46,16 @@ #define JIT_F_CPU_FIRST JIT_F_SQRT #define JIT_F_CPUSTRING "\4SQRT\5ROUND" #elif LJ_TARGET_MIPS -#define JIT_F_MIPS32R2 0x00000010 +#define JIT_F_MIPSXXR2 0x00000010 /* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 +#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#if LJ_TARGET_MIPS32 #define JIT_F_CPUSTRING "\010MIPS32R2" #else +#define JIT_F_CPUSTRING "\010MIPS64R2" +#endif +#else #define JIT_F_CPU_FIRST 0 #define JIT_F_CPUSTRING "" #endif @@ -178,14 +183,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) +#if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_isframe(sn) ((sn) & SNAP_FRAME) -#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) +static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) +{ +#if LJ_FR2 + uint64_t pcbase; + memcpy(&pcbase, sn, sizeof(uint64_t)); + return (const BCIns *)(pcbase >> 8); +#else + return (const BCIns *)(uintptr_t)*sn; +#endif +} + /* Snapshot and exit numbers. */ typedef uint32_t SnapNo; typedef uint32_t ExitNo; @@ -307,6 +324,45 @@ enum { LJ_KSIMD__MAX }; +enum { +#if LJ_TARGET_X86ORX64 + LJ_K64_TOBIT, /* 2^52 + 2^51 */ + LJ_K64_2P64, /* 2^64 */ + LJ_K64_M2P64, /* -2^64 */ +#if LJ_32 + LJ_K64_M2P64_31, /* -2^64 or -2^31 */ +#else + LJ_K64_M2P64_31 = LJ_K64_M2P64, +#endif +#endif +#if LJ_TARGET_MIPS + LJ_K64_2P31, /* 2^31 */ +#if LJ_64 + LJ_K64_2P63, /* 2^63 */ + LJ_K64_M2P64, /* -2^64 */ +#endif +#endif + LJ_K64__MAX, +}; + +enum { +#if LJ_TARGET_X86ORX64 + LJ_K32_M2P64_31, /* -2^64 or -2^31 */ +#endif +#if LJ_TARGET_PPC + LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ + LJ_K32_2P52, /* 2^52 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + LJ_K32_2P31, /* 2^31 */ +#endif +#if LJ_TARGET_MIPS64 + LJ_K32_2P63, /* 2^63 */ + LJ_K32_M2P64, /* -2^64 */ +#endif + LJ_K32__MAX +}; + /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) @@ -323,13 +379,14 @@ enum { /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { IRIns ins; /* Currently emitted instruction. */ - IRIns left; /* Instruction referenced by left operand. */ - IRIns right; /* Instruction referenced by right operand. */ + IRIns left[2]; /* Instruction referenced by left operand. */ + IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; /* JIT compiler state. */ typedef struct jit_State { GCtrace cur; /* Current trace. */ + GCtrace *curfinal; /* Final address of current trace (set during asm). */ lua_State *L; /* Current Lua state. */ const BCIns *pc; /* Current PC. */ @@ -359,8 +416,9 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ - MRef k64; /* Pointer to chained array of 64 bit constants. */ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ + uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ @@ -381,7 +439,7 @@ typedef struct jit_State { GCRef *trace; /* Array of traces. */ TraceNo freetrace; /* Start of scan for next free trace. */ MSize sizetrace; /* Size of trace array. */ - TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */ + IRRef1 ktrace; /* Reference to KGC with GCtrace. */ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ diff --git a/luajit-2.1/src/lj_lex.c b/luajit-2.1/src/lj_lex.c index 5a918f7..2d2f819 100644 --- a/luajit-2.1/src/lj_lex.c +++ b/luajit-2.1/src/lj_lex.c @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/luajit-2.1/src/lj_lex.h b/luajit-2.1/src/lj_lex.h index acd2285..33fa865 100644 --- a/luajit-2.1/src/lj_lex.h +++ b/luajit-2.1/src/lj_lex.h @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LEX_H diff --git a/luajit-2.1/src/lj_lib.c b/luajit-2.1/src/lj_lib.c index b16d056..b8638de 100644 --- a/luajit-2.1/src/lj_lib.c +++ b/luajit-2.1/src/lj_lib.c @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_lib_c diff --git a/luajit-2.1/src/lj_lib.h b/luajit-2.1/src/lj_lib.h index 3fa7aa1..37ec9d7 100644 --- a/luajit-2.1/src/lj_lib.h +++ b/luajit-2.1/src/lj_lib.h @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LIB_H diff --git a/luajit-2.1/src/lj_load.c b/luajit-2.1/src/lj_load.c index 95a6ab0..9a31d9a 100644 --- a/luajit-2.1/src/lj_load.c +++ b/luajit-2.1/src/lj_load.c @@ -1,6 +1,6 @@ /* ** Load and dump code. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include <errno.h> diff --git a/luajit-2.1/src/lj_mcode.c b/luajit-2.1/src/lj_mcode.c index d95ebeb..77035bf 100644 --- a/luajit-2.1/src/lj_mcode.c +++ b/luajit-2.1/src/lj_mcode.c @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_mcode_c @@ -204,8 +204,8 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_TARGET_X64 -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < (uintptr_t)1<<47) +#if LJ_64 +#define mcode_validptr(p) (p) #else #define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) #endif @@ -221,8 +221,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) */ #if LJ_TARGET_MIPS /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + - 0x08000000u; + uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & + ~(uintptr_t)0x0fffffffu) + 0x08000000u; #else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; #endif @@ -230,7 +230,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* First try a contiguous area below the last one. */ uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; int i; - for (i = 0; i < 32; i++) { /* 32 attempts ought to be enough ... */ + /* Limit probing iterations, depending on the available pool size. */ + for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { if (mcode_validptr(hint)) { void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); @@ -239,11 +240,11 @@ static void *mcode_alloc(jit_State *J, size_t sz) return p; if (p) mcode_free(J, p, sz); /* Free badly placed area. */ } - /* Next try probing pseudo-random addresses. */ + /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = (0x78fb ^ LJ_PRNG_BITS(J, 15)) << 16; /* 64K aligned. */ - } while (!(hint + sz < range)); - hint = target + hint - (range>>1); + hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; + } while (!(hint + sz < range+range)); + hint = target + hint - range; } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; diff --git a/luajit-2.1/src/lj_mcode.h b/luajit-2.1/src/lj_mcode.h index ee60452..f0847e9 100644 --- a/luajit-2.1/src/lj_mcode.h +++ b/luajit-2.1/src/lj_mcode.h @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_MCODE_H diff --git a/luajit-2.1/src/lj_meta.c b/luajit-2.1/src/lj_meta.c index 104ecf0..0bd4d84 100644 --- a/luajit-2.1/src/lj_meta.c +++ b/luajit-2.1/src/lj_meta.c @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -278,25 +278,25 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) */ TValue *e, *o = top; uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; - char *p, *buf; + SBuf *sb; do { o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); - p = buf = lj_buf_tmp(L, (MSize)tlen); + sb = lj_buf_tmp_(L); + lj_buf_more(sb, (MSize)tlen); for (e = top, top = o; o <= e; o++) { if (tvisstr(o)) { GCstr *s = strV(o); MSize len = s->len; - p = lj_buf_wmem(p, strdata(s), len); + lj_buf_putmem(sb, strdata(s), len); } else if (tvisint(o)) { - p = lj_strfmt_wint(p, intV(o)); + lj_strfmt_putint(sb, intV(o)); } else { - lua_assert(tvisnum(o)); - p = lj_strfmt_wnum(p, o); + lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); } } - setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf))); + setstrV(L, top, lj_buf_str(L, sb)); } } while (left >= 1); if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { diff --git a/luajit-2.1/src/lj_meta.h b/luajit-2.1/src/lj_meta.h index 7f71633..73b4572 100644 --- a/luajit-2.1/src/lj_meta.h +++ b/luajit-2.1/src/lj_meta.h @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_META_H diff --git a/luajit-2.1/src/lj_obj.c b/luajit-2.1/src/lj_obj.c index b78d2c8..ee33aeb 100644 --- a/luajit-2.1/src/lj_obj.c +++ b/luajit-2.1/src/lj_obj.c @@ -1,6 +1,6 @@ /* ** Miscellaneous object handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_obj_c diff --git a/luajit-2.1/src/lj_obj.h b/luajit-2.1/src/lj_obj.h index 74ed59b..52372c3 100644 --- a/luajit-2.1/src/lj_obj.h +++ b/luajit-2.1/src/lj_obj.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM tags, values and objects. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -843,12 +843,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) #endif #if LJ_FR2 -#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)(void *)(f)) +#define contptr(f) ((void *)(f)) +#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) #elif LJ_64 +#define contptr(f) \ + ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) #define setcont(o, f) \ ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) #else -#define setcont(o, f) setlightudV((o), (void *)(f)) +#define contptr(f) ((void *)(f)) +#define setcont(o, f) setlightudV((o), contptr(f)) #endif #define tvchecklive(L, o) \ diff --git a/luajit-2.1/src/lj_opt_dce.c b/luajit-2.1/src/lj_opt_dce.c index 7f1faaf..2417f32 100644 --- a/luajit-2.1/src/lj_opt_dce.c +++ b/luajit-2.1/src/lj_opt_dce.c @@ -1,6 +1,6 @@ /* ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_dce_c diff --git a/luajit-2.1/src/lj_opt_fold.c b/luajit-2.1/src/lj_opt_fold.c index f809a99..706fbc9 100644 --- a/luajit-2.1/src/lj_opt_fold.c +++ b/luajit-2.1/src/lj_opt_fold.c @@ -2,7 +2,7 @@ ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. ** ABCelim: Array Bounds Check Elimination. ** CSE: Common-Subexpression Elimination. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_fold_c @@ -136,8 +136,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) #define knumleft (ir_knum(fleft)->n) #define knumright (ir_knum(fright)->n) @@ -173,8 +173,6 @@ LJFOLD(ADD KNUM KNUM) LJFOLD(SUB KNUM KNUM) LJFOLD(MUL KNUM KNUM) LJFOLD(DIV KNUM KNUM) -LJFOLD(NEG KNUM KNUM) -LJFOLD(ABS KNUM KNUM) LJFOLD(ATAN2 KNUM KNUM) LJFOLD(LDEXP KNUM KNUM) LJFOLD(MIN KNUM KNUM) @@ -187,6 +185,15 @@ LJFOLDF(kfold_numarith) return lj_ir_knum(J, y); } +LJFOLD(NEG KNUM FLOAD) +LJFOLD(ABS KNUM FLOAD) +LJFOLDF(kfold_numabsneg) +{ + lua_Number a = knumleft; + lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); + return lj_ir_knum(J, y); +} + LJFOLD(LDEXP KNUM KINT) LJFOLDF(kfold_ldexp) { @@ -347,6 +354,11 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; + case IR_BSHL: k1 <<= (k2 & 63); break; + case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; + case IR_BSAR: k1 >>= (k2 & 63); break; + case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; #endif default: UNUSED(k2); lua_assert(0); break; } @@ -436,14 +448,14 @@ LJFOLDF(kfold_int64comp) #if LJ_HASFFI uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64; switch ((IROp)fins->o) { - case IR_LT: return CONDFOLD(a < b); - case IR_GE: return CONDFOLD(a >= b); - case IR_LE: return CONDFOLD(a <= b); - case IR_GT: return CONDFOLD(a > b); - case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b); - case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b); - case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b); - case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b); + case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b); + case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b); + case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b); + case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b); + case IR_ULT: return CONDFOLD(a < b); + case IR_UGE: return CONDFOLD(a >= b); + case IR_ULE: return CONDFOLD(a <= b); + case IR_UGT: return CONDFOLD(a > b); default: lua_assert(0); return FAILFOLD; } #else @@ -502,7 +514,7 @@ LJFOLDF(kfold_strref_snew) PHIBARRIER(ir); fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ fins->op1 = str; - fins->ot = IRT(IR_STRREF, IRT_P32); + fins->ot = IRT(IR_STRREF, IRT_PGC); return RETRYFOLD; } } @@ -911,13 +923,13 @@ LJFOLDF(shortcut_round) return NEXTFOLD; } -LJFOLD(ABS ABS KNUM) +LJFOLD(ABS ABS FLOAD) LJFOLDF(shortcut_left) { return LEFTFOLD; /* f(g(x)) ==> g(x) */ } -LJFOLD(ABS NEG KNUM) +LJFOLD(ABS NEG FLOAD) LJFOLDF(shortcut_dropleft) { PHIBARRIER(fleft); @@ -998,8 +1010,10 @@ LJFOLDF(simplify_nummuldiv_k) if (n == 1.0) { /* x o 1 ==> x */ return LEFTFOLD; } else if (n == -1.0) { /* x o -1 ==> -x */ + IRRef op1 = fins->op1; + fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ + fins->op1 = op1; fins->o = IR_NEG; - fins->op2 = (IRRef1)lj_ir_knum_neg(J); return RETRYFOLD; } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ fins->o = IR_ADD; @@ -1651,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk) fins->op2 = (IRRef1)lj_ir_kint(J, k); fins->ot = IRTI(IR_BAND); return RETRYFOLD; + } else if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); + IROpT ot = fleft->ot; + fins->op1 = fleft->op1; + fins->op1 = (IRRef1)lj_opt_fold(J); + fins->op2 = (IRRef1)lj_ir_kint64(J, k); + fins->ot = ot; + return RETRYFOLD; } return NEXTFOLD; } @@ -2393,10 +2415,14 @@ retry: if (fins->op1 >= J->cur.nk) { key += (uint32_t)IR(fins->op1)->o << 10; *fleft = *IR(fins->op1); + if (fins->op1 < REF_TRUE) + fleft[1] = IR(fins->op1)[1]; } if (fins->op2 >= J->cur.nk) { key += (uint32_t)IR(fins->op2)->o; *fright = *IR(fins->op2); + if (fins->op2 < REF_TRUE) + fright[1] = IR(fins->op2)[1]; } else { key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ } diff --git a/luajit-2.1/src/lj_opt_loop.c b/luajit-2.1/src/lj_opt_loop.c index 4b4ab7d..04c6d06 100644 --- a/luajit-2.1/src/lj_opt_loop.c +++ b/luajit-2.1/src/lj_opt_loop.c @@ -1,6 +1,6 @@ /* ** LOOP: Loop Optimizations. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_loop_c diff --git a/luajit-2.1/src/lj_opt_mem.c b/luajit-2.1/src/lj_opt_mem.c index e04a622..cc177d3 100644 --- a/luajit-2.1/src/lj_opt_mem.c +++ b/luajit-2.1/src/lj_opt_mem.c @@ -3,7 +3,7 @@ ** AA: Alias Analysis using high-level semantic disambiguation. ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). ** DSE: Dead-Store Elimination. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_mem_c @@ -22,8 +22,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) /* ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). diff --git a/luajit-2.1/src/lj_opt_narrow.c b/luajit-2.1/src/lj_opt_narrow.c index d199345..cd96ca4 100644 --- a/luajit-2.1/src/lj_opt_narrow.c +++ b/luajit-2.1/src/lj_opt_narrow.c @@ -1,7 +1,7 @@ /* ** NARROW: Narrowing of numbers to integers (double to int32_t). ** STRIPOV: Stripping of overflow checks. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_narrow_c @@ -517,18 +517,24 @@ static int numisint(lua_Number n) return (n == (lua_Number)lj_num2int(n)); } +/* Convert string to number. Error out for non-numeric string values. */ +static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) +{ + if (tref_isstr(tr)) { + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + /* Would need an inverted STRTO for this rare and useless case. */ + if (!lj_strscan_num(strV(o), o)) /* Convert in-place. Value used below. */ + lj_trace_err(J, LJ_TRERR_BADTYPE); /* Punt if non-numeric. */ + } + return tr; +} + /* Narrowing of arithmetic operations. */ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op) { - if (tref_isstr(rb)) { - rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); - lj_strscan_num(strV(vb), vb); - } - if (tref_isstr(rc)) { - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_strscan_num(strV(vc), vc); - } + rb = conv_str_tonum(J, rb, vb); + rc = conv_str_tonum(J, rc, vc); /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && tref_isinteger(rb) && tref_isinteger(rc) && @@ -543,24 +549,21 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, /* Narrowing of unary minus operator. */ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { - if (tref_isstr(rc)) { - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_strscan_num(strV(vc), vc); - } + rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { if ((uint32_t)numberVint(vc) != 0x80000000u) return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } - return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); + return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); } /* Narrowing of modulo operator. */ -TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) +TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) { TRef tmp; - if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) - lj_trace_err(J, LJ_TRERR_BADTYPE); + rb = conv_str_tonum(J, rb, vb); + rc = conv_str_tonum(J, rc, vc); if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && tref_isinteger(rb) && tref_isinteger(rc) && (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) { @@ -577,10 +580,11 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) } /* Narrowing of power operator or math.pow. */ -TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) +TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) { - if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) - lj_trace_err(J, LJ_TRERR_BADTYPE); + rb = conv_str_tonum(J, rb, vb); + rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ + rc = conv_str_tonum(J, rc, vc); /* Narrowing must be unconditional to preserve (-x)^i semantics. */ if (tvisint(vc) || numisint(numV(vc))) { int checkrange = 0; @@ -591,8 +595,6 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) checkrange = 1; } if (!tref_isinteger(rc)) { - if (tref_isstr(rc)) - rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); /* Guarded conversion to integer! */ rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); } diff --git a/luajit-2.1/src/lj_opt_sink.c b/luajit-2.1/src/lj_opt_sink.c index a98e9df..929ccb6 100644 --- a/luajit-2.1/src/lj_opt_sink.c +++ b/luajit-2.1/src/lj_opt_sink.c @@ -1,6 +1,6 @@ /* ** SINK: Allocation Sinking and Store Sinking. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_sink_c @@ -153,10 +153,9 @@ static void sink_remark_phi(jit_State *J) remark = 0; for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) - remark = 1; - else if (irl->prev == irr->prev) + if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev) continue; + remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK); irt_setmark(IR(ir->op1)->t); irt_setmark(IR(ir->op2)->t); } @@ -166,8 +165,8 @@ static void sink_remark_phi(jit_State *J) /* Sweep instructions and tag sunken allocations and stores. */ static void sink_sweep_ins(jit_State *J) { - IRIns *ir, *irfirst = IR(J->cur.nk); - for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { + IRIns *ir, *irbase = IR(REF_BASE); + for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) { switch (ir->o) { case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { IRIns *ira = sink_checkalloc(J, ir); @@ -217,6 +216,12 @@ static void sink_sweep_ins(jit_State *J) break; } } + for (ir = IR(J->cur.nk); ir < irbase; ir++) { + irt_clearmark(ir->t); + ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; + } } /* Allocation sinking and store sinking. diff --git a/luajit-2.1/src/lj_opt_split.c b/luajit-2.1/src/lj_opt_split.c index 81ded6c..fc93520 100644 --- a/luajit-2.1/src/lj_opt_split.c +++ b/luajit-2.1/src/lj_opt_split.c @@ -1,6 +1,6 @@ /* ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_split_c @@ -16,6 +16,7 @@ #include "lj_jit.h" #include "lj_ircall.h" #include "lj_iropt.h" +#include "lj_dispatch.h" #include "lj_vm.h" /* SPLIT pass: @@ -192,7 +193,7 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) nref = ir->op1; if (ofs == 0) return nref; } - return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); + return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); } #if LJ_HASFFI @@ -353,6 +354,8 @@ static void split_ir(jit_State *J) ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } /* Process old IR instructions. */ @@ -433,7 +436,8 @@ static void split_ir(jit_State *J) nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); + hisubst[ir->op1], + lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ @@ -448,15 +452,24 @@ static void split_ir(jit_State *J) case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; + case IR_FLOAD: + lua_assert(ir->op1 == REF_NIL); + hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); + nir->op2 += LJ_BE*4; + break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ +#if LJ_BE + hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); + inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); +#endif nref = lj_ir_nextins(J); nir = IR(nref); - *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ - hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); + *nir = inslo; /* Re-emit lo XLOAD. */ #if LJ_LE + hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); ir->prev = nref; #else ir->prev = hi; hi = nref; @@ -596,7 +609,8 @@ static void split_ir(jit_State *J) } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ - /* Drop cast, since assembler doesn't care. */ + /* Drop cast, since assembler doesn't care. But fwd both parts. */ + hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); diff --git a/luajit-2.1/src/lj_parse.c b/luajit-2.1/src/lj_parse.c index 9891897..08f7cfa 100644 --- a/luajit-2.1/src/lj_parse.c +++ b/luajit-2.1/src/lj_parse.c @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -1282,12 +1282,14 @@ static void fscope_end(FuncState *fs) MSize idx = gola_new(ls, NAME_BREAK, VSTACK_LABEL, fs->pc); ls->vtop = idx; /* Drop break label immediately. */ gola_resolve(ls, bl, idx); + } else { /* Need the fixup step to propagate the breaks. */ + gola_fixup(ls, bl); return; - } /* else: need the fixup step to propagate the breaks. */ - } else if (!(bl->flags & FSCOPE_GOLA)) { - return; + } + } + if ((bl->flags & FSCOPE_GOLA)) { + gola_fixup(ls, bl); } - gola_fixup(ls, bl); } /* Mark scope as having an upvalue. */ @@ -2177,6 +2179,8 @@ static void assign_adjust(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e) bcemit_nil(fs, reg, (BCReg)extra); } } + if (nexps > nvars) + ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */ } /* Recursively parse assignment statement. */ @@ -2210,8 +2214,6 @@ static void parse_assignment(LexState *ls, LHSVarList *lh, BCReg nvars) return; } assign_adjust(ls, nvars, nexps, &e); - if (nexps > nvars) - ls->fs->freereg -= nexps - nvars; /* Drop leftover regs. */ } /* Assign RHS to LHS and recurse downwards. */ expr_init(&e, VNONRELOC, ls->fs->freereg-1); diff --git a/luajit-2.1/src/lj_parse.h b/luajit-2.1/src/lj_parse.h index dc4fd40..ceeab69 100644 --- a/luajit-2.1/src/lj_parse.h +++ b/luajit-2.1/src/lj_parse.h @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PARSE_H diff --git a/luajit-2.1/src/lj_profile.c b/luajit-2.1/src/lj_profile.c index c7e5396..116998e 100644 --- a/luajit-2.1/src/lj_profile.c +++ b/luajit-2.1/src/lj_profile.c @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_profile_c diff --git a/luajit-2.1/src/lj_profile.h b/luajit-2.1/src/lj_profile.h index 26cb9db..0cccfd7 100644 --- a/luajit-2.1/src/lj_profile.h +++ b/luajit-2.1/src/lj_profile.h @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PROFILE_H diff --git a/luajit-2.1/src/lj_record.c b/luajit-2.1/src/lj_record.c index dc5f2d5..9d0469c 100644 --- a/luajit-2.1/src/lj_record.c +++ b/luajit-2.1/src/lj_record.c @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_record_c @@ -51,7 +51,7 @@ static void rec_check_ir(jit_State *J) { IRRef i, nins = J->cur.nins, nk = J->cur.nk; lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); - for (i = nins-1; i >= nk; i--) { + for (i = nk; i < nins; i++) { IRIns *ir = IR(i); uint32_t mode = lj_ir_mode[ir->o]; IRRef op1 = ir->op1; @@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J) case IRMref: lua_assert(op1 >= nk); lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); continue; + case IRMcst: lua_assert(i < REF_BIAS); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + i++; + continue; } switch (irm_op2(mode)) { case IRMnone: lua_assert(op2 == 0); break; @@ -84,30 +87,48 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); - lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); + lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); lua_assert(nslots < LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); - IRIns *ir; - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); - ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + IRIns *ir = NULL; /* Silence compiler. */ + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { + lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + ir = IR(ref); + lua_assert(irt_t(ir->t) == tref_t(tr)); + } if (s == 0) { lua_assert(tref_isfunc(tr)); +#if LJ_FR2 + } else if (s == 1) { + lua_assert((tr & ~TREF_FRAME) == 0); +#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); +#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); + lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); +#else lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); - lua_assert((J->slot[s+1] & TREF_FRAME)); +#endif + lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { if (tvisnumber(tv)) @@ -159,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) /* Get TRef for current function. */ static TRef getcurrf(jit_State *J) { - if (J->base[-1]) - return J->base[-1]; - lua_assert(J->baseslot == 1); - return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; + lua_assert(J->baseslot == 1+LJ_FR2); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } /* Compare for raw object equality. @@ -506,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); @@ -643,8 +663,8 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) GCproto *pt = funcproto(fn); /* Too many closures created? Probably not a monomorphic function. */ if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ - TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); - emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); + TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); + emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ return tr; } @@ -675,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) { RecordIndex ix; TValue *functv = &J->L->base[func]; - TRef *fbase = &J->base[func]; + TRef kfunc, *fbase = &J->base[func]; ptrdiff_t i; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - for (i = 0; i <= nargs; i++) - (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ + (void)getslot(J, func); /* Ensure func has a reference. */ + for (i = 1; i <= nargs; i++) + (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ ix.tab = fbase[0]; copyTV(J->L, &ix.tabv, functv); if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) lj_trace_err(J, LJ_TRERR_NOMM); - for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ - fbase[i] = fbase[i-1]; + for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ + fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; +#if LJ_FR2 + fbase[2] = fbase[0]; +#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); + kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); +#if LJ_FR2 + fbase[0] = kfunc; + fbase[1] = TREF_FRAME; +#else + fbase[0] = kfunc | TREF_FRAME; +#endif J->maxslot = (BCReg)nargs; } @@ -700,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) rec_call_setup(J, func, nargs); /* Bump frame. */ J->framedepth++; - J->base += func+1; - J->baseslot += func+1; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; } /* Record tail call. */ @@ -717,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) func += cbase; } /* Move func + args down. */ - memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); + if (LJ_FR2 && J->baseslot == 2) + J->base[func+1] = TREF_FRAME; + memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ if (++J->tailcalled > J->loopunroll) @@ -758,9 +789,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ BCReg cbase = (BCReg)frame_delta(frame); - if (--J->framedepth < 0) + if (--J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; @@ -784,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -794,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; BCReg cbase = bc_a(callins); - GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */ + GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { @@ -808,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_snap_add(J); } for (i = 0; i < nresults; i++) /* Adjust results. */ - J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; + J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1); - J->baseslot -= cbase+1; - J->base -= cbase+1; + lua_assert(J->baseslot > cbase+1+LJ_FR2); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ @@ -824,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); - emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1); + lua_assert(J->baseslot == 1+LJ_FR2); /* Shift result slots up and clear the slots of the new frame below. */ - memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); - memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); } } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); @@ -839,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; - J->maxslot = cbase-2; + J->maxslot = cbase-(2<<LJ_FR2); if (cont == lj_cont_ra) { /* Copy result to destination slot. */ BCReg dst = bc_a(*(frame_contpc(frame)-1)); J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } else if (cont == lj_cont_nop) { /* Nothing to do here. */ } else if (cont == lj_cont_cat) { BCReg bslot = bc_b(*(frame_contpc(frame)-1)); TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (bslot != cbase-2) { /* Concatenate the remainder. */ + if (bslot != J->maxslot) { /* Concatenate the remainder. */ TValue *b = J->L->base, save; /* Simulate lower frame and result. */ - J->base[cbase-2] = tr; - copyTV(J->L, &save, b-2); - if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<<LJ_FR2)); + if (gotresults) + copyTV(J->L, b-(2<<LJ_FR2), b+rbase); + else + setnilV(b-(2<<LJ_FR2)); J->L->base = b - cbase; - tr = rec_cat(J, bslot, cbase-2); + tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); b = J->L->base + cbase; /* Undo. */ J->L->base = b; - copyTV(J->L, b-2, &save); + copyTV(J->L, b-(2<<LJ_FR2), &save); } if (tr) { /* Store final result. */ BCReg dst = bc_a(*(frame_contpc(frame)-1)); J->base[dst] = tr; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ @@ -873,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1); + lua_assert(J->baseslot >= 1+LJ_FR2); } /* -- Metamethod handling ------------------------------------------------- */ @@ -882,16 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; -#if LJ_64 - TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); +#if LJ_FR2 + J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[top+1] = TREF_CONT; #else - TRef trcont = lj_ir_kptr(J, (void *)cont); + J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; #endif - J->base[top] = trcont | TREF_CONT; J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ - return top+1; + return top+1+LJ_FR2; } /* Record metamethod lookup. */ @@ -910,7 +947,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) cTValue *mo; if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { /* Specialize to the C library namespace object. */ - emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); + emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); } else { /* Specialize to the type of userdata. */ TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); @@ -939,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, + GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); +#else ix->mt = mix.tab = lj_ir_ktab(J, mt); +#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -969,9 +1012,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - base[1] = ix->tab; base[2] = ix->key; - copyTV(J->L, basev+1, &ix->tabv); - copyTV(J->L, basev+2, &ix->keyv); + base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; + copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); + copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (mm != MM_unm) { ix->tab = ix->key; @@ -982,8 +1025,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) lj_trace_err(J, LJ_TRERR_NOMM); } ok: - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix->mobj; +#if LJ_FR2 + base[1] = 0; +#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -999,8 +1044,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) BCReg func = rec_mm_prep(J, lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); + base += LJ_FR2; + basev += LJ_FR2; base[1] = tr; copyTV(J->L, basev+1, tv); #if LJ_52 base[2] = tr; copyTV(J->L, basev+2, tv); @@ -1020,11 +1066,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) { BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; - copyTV(J->L, tv+0, &ix->mobjv); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv-LJ_FR2, &ix->mobjv); copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+2, &ix->keyv); lj_record_call(J, func, 2); @@ -1236,12 +1281,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) } /* Record indexed key lookup. */ -static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref) +static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, + IRType1 *rbguard) { TRef key; GCtab *t = tabV(&ix->tabv); ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ *rbref = 0; + rbguard->irt = 0; /* Integer keys are looked up in the array part first. */ key = ix->key; @@ -1255,8 +1302,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref) if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; rec_idx_abc(J, asizeref, ikey, t->asize); - arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); - return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); + arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); + return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); } else { /* Currently not in array (may be an array extension)? */ emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ if (k == 0 && tref_isk(key)) @@ -1293,15 +1340,16 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref) hslot <= 65535*(MSize)sizeof(Node)) { TRef node, kslot, hm; *rbref = J->cur.nins; /* Mark possible rollback point. */ + *rbguard = J->guardemit; hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); - node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); + node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); - return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); + return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); } } /* Fall back to a regular hash lookup. */ - return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); + return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); } /* Determine whether a key is NOT one of the fast metamethod names. */ @@ -1327,6 +1375,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) TRef xref; IROp xrefop, loadop; IRRef rbref; + IRType1 rbguard; cTValue *oldv; while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ @@ -1337,11 +1386,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) handlemm: if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; - setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+2, &ix->keyv); if (ix->val) { @@ -1373,7 +1421,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } /* Record the key lookup. */ - xref = rec_idx_key(J, ix, &rbref); + xref = rec_idx_key(J, ix, &rbref, &rbguard); xrefop = IR(tref_ref(xref))->o; loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; /* The lj_meta_tset() inconsistency is gone, but better play safe. */ @@ -1383,13 +1431,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) IRType t = itype2irt(oldv); TRef res; if (oldv == niltvg(J2G(J))) { - emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); res = TREF_NIL; } else { res = emitir(IRTG(loadop, t), xref, 0); } - if (tref_ref(res) < rbref) /* HREFK + load forwarded? */ + if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */ lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ + J->guardemit = rbguard; + } if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) goto handlemm; if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ @@ -1397,8 +1447,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } else { /* Indexed store. */ GCtab *mt = tabref(tabV(&ix->tabv)->metatable); int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); - if (tref_ref(xref) < rbref) /* HREFK forwarded? */ + if (tref_ref(xref) < rbref) { /* HREFK forwarded? */ lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ + J->guardemit = rbguard; + } if (tvisnil(oldv)) { /* Previous value was nil? */ /* Need to duplicate the hasmm check for the early guards. */ int hasmm = 0; @@ -1409,7 +1461,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) if (hasmm) emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ else if (xrefop == IR_HREF) - emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), + emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { lua_assert(hasmm); @@ -1420,7 +1472,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) TRef key = ix->key; if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); - xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); + xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ @@ -1430,7 +1482,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { /* Cannot derive that the previous value was non-nil, must do checks. */ if (xrefop == IR_HREF) /* Guard against store to niltv. */ - emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain) { /* Metamethod lookup required? */ /* A check for NULL metatable is cheaper (hoistable) than a load. */ if (!mt) { @@ -1452,7 +1504,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); /* Invalidate neg. metamethod cache for stores with certain string keys. */ if (!nommstr(J, ix->key)) { - TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); + TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); } J->needsnap = 1; @@ -1527,7 +1579,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); - J->base[-1] = TREF_FRAME | kfunc; +#if LJ_FR2 + J->base[-2] = kfunc; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1538,13 +1594,17 @@ noconstify: /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); if (!uvp->closed) { + uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); /* In current stack? */ if (uvval(uvp) >= tvref(J->L->stack) && uvval(uvp) < tvref(J->L->maxstack)) { int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); if (slot >= 0) { /* Aliases an SSA slot? */ + emitir(IRTG(IR_EQ, IRT_PGC), + REF_BASE, + emitir(IRT(IR_ADD, IRT_PGC), uref, + lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ - /* NYI: add IR to guard that it's still aliasing the same slot. */ if (val == 0) { return getslot(J, slot); } else { @@ -1554,10 +1614,12 @@ noconstify: } } } - uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); + emitir(IRTG(IR_UGT, IRT_PGC), + emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), + lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); } else { needbarrier = 1; - uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); + uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); } if (val == 0) { /* Upvalue load */ IRType t = itype2irt(uvval(uvp)); @@ -1632,11 +1694,14 @@ static void rec_func_setup(jit_State *J) static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; - BCReg s, fixargs, vframe = J->maxslot+1; + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; lua_assert((pt->flags & PROTO_VARARG)); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); - J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +#if LJ_FR2 + J->base[vframe-1] = TREF_FRAME; +#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1685,8 +1750,11 @@ static int select_detect(jit_State *J) BCIns ins = J->pc[1]; if (bc_op(ins) == BC_CALLM && bc_b(ins) == 2 && bc_c(ins) == 1) { cTValue *func = &J->L->base[bc_a(ins)]; - if (tvisfunc(func) && funcV(func)->c.ffid == FF_select) + if (tvisfunc(func) && funcV(func)->c.ffid == FF_select) { + TRef kfunc = lj_ir_kfunc(J, funcV(func)); + emitir(IRTG(IR_EQ, IRT_FUNC), getslot(J, bc_a(ins)), kfunc); return 1; + } } return 0; } @@ -1695,8 +1763,10 @@ static int select_detect(jit_State *J) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; - ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; lua_assert(frame_isvarg(J->L->base-1)); + if (LJ_FR2 && dst > J->maxslot) + J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; if (nvararg < 0) nvararg = 0; @@ -1707,10 +1777,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) J->maxslot = dst + (BCReg)nresults; } for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; + J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ - TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); - int32_t frofs = 8*(1+numparams)+FRAME_VARG; + TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; if (nresults >= 0) { /* Known fixed number of results. */ ptrdiff_t i; if (nvararg > 0) { @@ -1721,11 +1791,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) else emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); - vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); + vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); for (i = 0; i < nload; i++) { - IRType t = itype2irt(&J->L->base[i-1-nvararg]); - TRef aref = emitir(IRT(IR_AREF, IRT_P32), + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); + TRef aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, lj_ir_kint(J, (int32_t)i)); TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ @@ -1771,15 +1841,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) } if (idx != 0 && idx <= nvararg) { IRType t; - TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); - t = itype2irt(&J->L->base[idx-2-nvararg]); - aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); + TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kint(J, frofs-(8<<LJ_FR2))); + t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); + aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ } - J->base[dst-2] = tr; - J->maxslot = dst-1; + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; J->bcskip = 2; /* Skip CALLM + select. */ } else { nyivarg: @@ -1828,10 +1899,10 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) break; } xbase = ++trp; - tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32), + tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); do { - tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); + tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++); } while (trp <= top); tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); J->maxslot = (BCReg)(xbase - J->base); @@ -1872,7 +1943,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ +#if LJ_FR2 + SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; + uint64_t pcbase; + memcpy(&pcbase, flink, sizeof(uint64_t)); + pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); + memcpy(flink, &pcbase, sizeof(uint64_t)); +#else J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); +#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -2148,14 +2227,14 @@ void lj_record_ins(jit_State *J) case BC_MODVN: case BC_MODVV: recmod: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_mod(J, rb, rc, rcv); + rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv); else rc = rec_mm_arith(J, &ix, MM_mod); break; case BC_POW: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); + rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv); else rc = rec_mm_arith(J, &ix, MM_pow); break; @@ -2170,7 +2249,13 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ - if (ra > J->maxslot) J->base[ra-1] = 0; + if (ra > J->maxslot) { +#if LJ_FR2 + memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); +#else + J->base[ra-1] = 0; +#endif + } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: break; @@ -2178,6 +2263,8 @@ void lj_record_ins(jit_State *J) rc = lj_ir_kint(J, (int32_t)(int16_t)rc); break; case BC_KNIL: + if (LJ_FR2 && ra > J->maxslot) + J->base[ra-1] = 0; while (ra <= rc) J->base[ra++] = TREF_NIL; if (rc >= J->maxslot) J->maxslot = rc+1; @@ -2239,14 +2326,14 @@ void lj_record_ins(jit_State *J) /* -- Calls and vararg handling ----------------------------------------- */ case BC_ITERC: - J->base[ra] = getslot(J, ra-3-LJ_FR2); - J->base[ra+1] = getslot(J, ra-2-LJ_FR2); - J->base[ra+2] = getslot(J, ra-1-LJ_FR2); + J->base[ra] = getslot(J, ra-3); + J->base[ra+1+LJ_FR2] = getslot(J, ra-2); + J->base[ra+2+LJ_FR2] = getslot(J, ra-1); { /* Do the actual copy now because lj_record_call needs the values. */ TValue *b = &J->L->base[ra]; - copyTV(J->L, b, b-3-LJ_FR2); - copyTV(J->L, b+1, b-2-LJ_FR2); - copyTV(J->L, b+2, b-1-LJ_FR2); + copyTV(J->L, b, b-3); + copyTV(J->L, b+1+LJ_FR2, b-2); + copyTV(J->L, b+2+LJ_FR2, b-1); } lj_record_call(J, ra, (ptrdiff_t)rc-1); break; @@ -2369,7 +2456,12 @@ void lj_record_ins(jit_State *J) /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; - if (ra >= J->maxslot) J->maxslot = ra+1; + if (ra >= J->maxslot) { +#if LJ_FR2 + if (ra > J->maxslot) J->base[ra-1] = 0; +#endif + J->maxslot = ra+1; + } } #undef rav @@ -2454,7 +2546,7 @@ void lj_record_setup(jit_State *J) J->scev.idx = REF_NIL; setmref(J->scev.pc, NULL); - J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ J->base = J->slot + J->baseslot; J->maxslot = 0; J->framedepth = 0; @@ -2469,7 +2561,7 @@ void lj_record_setup(jit_State *J) J->bc_extent = ~(MSize)0; /* Emit instructions for fixed references. Also triggers initial IR alloc. */ - emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); + emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); for (i = 0; i <= 2; i++) { IRIns *ir = IR(REF_NIL-i); ir->i = 0; diff --git a/luajit-2.1/src/lj_record.h b/luajit-2.1/src/lj_record.h index 732adb4..93d374d 100644 --- a/luajit-2.1/src/lj_record.h +++ b/luajit-2.1/src/lj_record.h @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_RECORD_H diff --git a/luajit-2.1/src/lj_snap.c b/luajit-2.1/src/lj_snap.c index 7c78f8a..bb063c2 100644 --- a/luajit-2.1/src/lj_snap.c +++ b/luajit-2.1/src/lj_snap.c @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_snap_c @@ -68,10 +68,22 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); +#if LJ_FR2 + if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ + if ((tr & TREF_FRAME)) + map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); + continue; + } + if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { + cTValue *base = J->L->base - J->baseslot; + tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); + ref = tref_ref(tr); + } +#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; - if (!(sn & (SNAP_CONT|SNAP_FRAME)) && + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) @@ -90,34 +102,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) } /* Add frame links at the end of the snapshot. */ -static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) +static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) { cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; + cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; +#if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); + lua_assert(2 <= J->baseslot && J->baseslot <= 257); + memcpy(map, &pcbase, sizeof(uint64_t)); +#else MSize f = 0; - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ +#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKPC(frame_pc(frame)); +#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); +#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); +#endif frame = frame_prevd(frame); continue; } if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } + *topslot = (uint8_t)(ftop - lim); +#if LJ_FR2 + lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + return 2; +#else lua_assert(f == (MSize)(1 + J->framedepth)); - return (BCReg)(ftop - lim); + return f; +#endif } /* Take a snapshot of the current stack. */ @@ -127,16 +156,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nent = (uint8_t)nent; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -145,8 +174,8 @@ void lj_snap_add(jit_State *J) MSize nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; /* Merge if no ins. inbetween or if requested and no guard inbetween. */ - if (J->mergesnap ? !irt_isguard(J->guardemit) : - (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { + if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || + (J->mergesnap && !irt_isguard(J->guardemit))) { if (nsnap == 1) { /* But preserve snap #0 PC. */ emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); goto nomerge; @@ -284,8 +313,8 @@ void lj_snap_shrink(jit_State *J) MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; - BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; + BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; @@ -371,8 +400,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KPRI: return TREF_PRI(irt_type(ir->t)); case IR_KINT: return lj_ir_kint(J, ir->i); case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); - case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); - case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); + case IR_KNUM: case IR_KINT64: + return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ default: lua_assert(0); return TREF_NIL; break; } @@ -445,7 +474,11 @@ void lj_snap_replay(jit_State *J, GCtrace *T) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { - tr = snap_replay_const(J, ir); + /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ + if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) + tr = 0; + else + tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; lua_assert(s != 0); @@ -459,7 +492,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ - J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); + J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } @@ -555,8 +588,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); - val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, - lj_ir_k64_find(J, k)); + val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); @@ -599,7 +631,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); - lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { @@ -608,9 +639,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif - } else if (LJ_64 && irt_islightud(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; +#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); @@ -628,9 +661,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif - } else if (LJ_64 && irt_is64(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; +#endif } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { @@ -651,7 +686,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, uint64_t tmp; if (irref_isk(ref)) { if (ir->o == IR_KNUM || ir->o == IR_KINT64) { - src = mref(ir->ptr, int32_t); + src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; src = (int32_t *)&tmp; @@ -688,8 +723,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, #else if (LJ_BE && sz == 4) src++; #endif - } + } else #endif + if (LJ_64 && LJ_BE && sz == 4) src++; } } lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); @@ -711,8 +747,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; - CTSize sz = lj_ctype_size(cts, id); - GCcdata *cd = lj_cdata_new(cts, id, sz); + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + GCcdata *cd = lj_cdata_newx(cts, id, sz, info); setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); @@ -794,11 +831,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; +#endif +#if !LJ_FR2 ptrdiff_t ftsz0; +#endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); - const BCIns *pc = snap_pc(map[nent]); + const BCIns *pc = snap_pc(&map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ @@ -811,8 +852,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ - frame = L->base-1; + frame = L->base-1-LJ_FR2; +#if !LJ_FR2 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ +#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -835,14 +878,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; +#if !LJ_FR2 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ /* Overwrite tag with frame link. */ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; +#endif } } } +#if LJ_FR2 + L->base += (map[nent+LJ_BE] & 0xff); +#endif lua_assert(map + nent == flinks); /* Compute current stack top. */ diff --git a/luajit-2.1/src/lj_snap.h b/luajit-2.1/src/lj_snap.h index 9a125be..2c9ae3d 100644 --- a/luajit-2.1/src/lj_snap.h +++ b/luajit-2.1/src/lj_snap.h @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_SNAP_H diff --git a/luajit-2.1/src/lj_state.c b/luajit-2.1/src/lj_state.c index 84b4d11..632dd07 100644 --- a/luajit-2.1/src/lj_state.c +++ b/luajit-2.1/src/lj_state.c @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -180,7 +180,7 @@ static void close_state(lua_State *L) g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) lua_State *lj_state_newstate(lua_Alloc f, void *ud) #else LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) @@ -224,7 +224,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) close_state(L); return NULL; } - L->status = 0; + L->status = LUA_OK; return L; } @@ -256,10 +256,10 @@ LUA_API void lua_close(lua_State *L) #endif for (i = 0;;) { hook_enter(g); - L->status = 0; + L->status = LUA_OK; L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; L->cframe = NULL; - if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { + if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { if (++i >= 10) break; lj_gc_separateudata(g, 1); /* Separate udata again. */ if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ @@ -274,7 +274,7 @@ lua_State *lj_state_new(lua_State *L) lua_State *L1 = lj_mem_newobj(L, lua_State); L1->gct = ~LJ_TTHREAD; L1->dummy_ffid = FF_C; - L1->status = 0; + L1->status = LUA_OK; L1->stacksize = 0; setmref(L1->stack, NULL); L1->cframe = NULL; diff --git a/luajit-2.1/src/lj_state.h b/luajit-2.1/src/lj_state.h index 687889a..02a0eaf 100644 --- a/luajit-2.1/src/lj_state.h +++ b/luajit-2.1/src/lj_state.h @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STATE_H @@ -28,7 +28,7 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) LJ_FUNC lua_State *lj_state_new(lua_State *L); LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); -#if LJ_64 +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); #endif diff --git a/luajit-2.1/src/lj_str.c b/luajit-2.1/src/lj_str.c index af6f592..e60fc71 100644 --- a/luajit-2.1/src/lj_str.c +++ b/luajit-2.1/src/lj_str.c @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_str_c diff --git a/luajit-2.1/src/lj_str.h b/luajit-2.1/src/lj_str.h index d8465de..85c1e40 100644 --- a/luajit-2.1/src/lj_str.h +++ b/luajit-2.1/src/lj_str.h @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STR_H diff --git a/luajit-2.1/src/lj_strfmt.c b/luajit-2.1/src/lj_strfmt.c index d54e796..d7893ce 100644 --- a/luajit-2.1/src/lj_strfmt.c +++ b/luajit-2.1/src/lj_strfmt.c @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include <stdio.h> @@ -18,7 +18,7 @@ /* -- Format parser ------------------------------------------------------- */ static const uint8_t strfmt_map[('x'-'A')+1] = { - STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0, + STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, 0,0,0,0,0,0, STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, @@ -89,24 +89,6 @@ retlit: /* -- Raw conversions ----------------------------------------------------- */ -/* Write number to bufer. */ -char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o) -{ - if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ -#if __BIONIC__ - if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; } -#endif - return p + lua_number2str(p, o->n); - } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { - *p++ = 'n'; *p++ = 'a'; *p++ = 'n'; - } else if ((o->u32.hi & 0x80000000) == 0) { - *p++ = 'i'; *p++ = 'n'; *p++ = 'f'; - } else { - *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f'; - } - return p; -} - #define WINT_R(x, sh, sc) \ { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } @@ -116,11 +98,15 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) uint32_t u = (uint32_t)k; if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } if (u < 10000) { - if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3; + if (u < 10) goto dig1; + if (u < 100) goto dig2; + if (u < 1000) goto dig3; } else { uint32_t v = u / 10000; u -= v * 10000; if (v < 10000) { - if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7; + if (v < 10) goto dig5; + if (v < 100) goto dig6; + if (v < 1000) goto dig7; } else { uint32_t w = v / 10000; v -= w * 10000; if (w >= 10) WINT_R(w, 10, 10) @@ -168,21 +154,22 @@ char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) return p; } -/* Return string or write number to buffer and return pointer to start. */ -const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp) +/* Return string or write number to tmp buffer and return pointer to start. */ +const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) { + SBuf *sb; if (tvisstr(o)) { *lenp = strV(o)->len; return strVdata(o); } else if (tvisint(o)) { - *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf); - return buf; + sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { - *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf); - return buf; + sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); } else { return NULL; } + *lenp = sbuflen(sb); + return sbufB(sb); } /* -- Unformatted conversions to buffer ----------------------------------- */ @@ -198,8 +185,7 @@ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) /* Add number to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) { - setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o)); - return sb; + return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); } #endif @@ -360,63 +346,6 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } -/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */ -#define STRFMT_FMTNUMBUF 512 - -/* Add formatted floating-point number to buffer. */ -SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) -{ - TValue tv; - tv.n = n; - if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { - /* Canonicalize output of non-finite values. */ - MSize width = STRFMT_WIDTH(sf), len = 3; - int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; - char *p; - if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) { - ch ^= ('n' << 16) | ('a' << 8) | 'n'; - if ((sf & STRFMT_F_SPACE)) prefix = ' '; - } else { - ch ^= ('i' << 16) | ('n' << 8) | 'f'; - if ((tv.u32.hi & 0x80000000)) prefix = '-'; - else if ((sf & STRFMT_F_PLUS)) prefix = '+'; - else if ((sf & STRFMT_F_SPACE)) prefix = ' '; - } - if (prefix) len = 4; - p = lj_buf_more(sb, width > len ? width : len); - if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; - if (prefix) *p++ = prefix; - *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; - if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; - setsbufP(sb, p); - } else { /* Delegate to sprintf() for now. */ - uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf); - char fmt[1+5+2+3+1+1], *p = fmt; - *p++ = '%'; - if ((sf & STRFMT_F_LEFT)) *p++ = '-'; - if ((sf & STRFMT_F_PLUS)) *p++ = '+'; - if ((sf & STRFMT_F_ZERO)) *p++ = '0'; - if ((sf & STRFMT_F_SPACE)) *p++ = ' '; - if ((sf & STRFMT_F_ALT)) *p++ = '#'; - if (width) { - uint8_t x = width / 10, y = width % 10; - if (x) *p++ = '0' + x; - *p++ = '0' + y; - } - if (prec != 255) { - uint8_t x = prec / 10, y = prec % 10; - *p++ = '.'; - if (x) *p++ = '0' + x; - *p++ = '0' + y; - } - *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0); - *p = '\0'; - p = lj_buf_more(sb, STRFMT_FMTNUMBUF); - setsbufP(sb, p + sprintf(p, fmt, n)); - } - return sb; -} - /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ @@ -427,14 +356,6 @@ GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) return lj_str_new(L, buf, len); } -/* Convert number to string. */ -GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) -{ - char buf[STRFMT_MAXBUF_NUM]; - MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf); - return lj_str_new(L, buf, len); -} - /* Convert integer or number to string. */ GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) { @@ -510,12 +431,9 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) case STRFMT_UINT: lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); break; - case STRFMT_NUM: { - TValue tv; - tv.n = va_arg(argp, lua_Number); - setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv)); + case STRFMT_NUM: + lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); break; - } case STRFMT_STR: { const char *s = va_arg(argp, char *); if (s == NULL) s = "(null)"; diff --git a/luajit-2.1/src/lj_strfmt.h b/luajit-2.1/src/lj_strfmt.h index dcfaf2e..6e1d901 100644 --- a/luajit-2.1/src/lj_strfmt.h +++ b/luajit-2.1/src/lj_strfmt.h @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRFMT_H @@ -64,11 +64,12 @@ typedef enum FormatType { #define STRFMT_S (STRFMT_STR) #define STRFMT_U (STRFMT_UINT) #define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) +#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC)) /* Maximum buffer sizes for conversions. */ #define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */ #define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */ -#define STRFMT_MAXBUF_NUM LUAI_MAXNUMBER2STR +#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */ #define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ /* Format parser. */ @@ -83,10 +84,9 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) /* Raw conversions. */ LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); -LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o); LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); -LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp); +LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); /* Unformatted conversions to buffer. */ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); diff --git a/luajit-2.1/src/lj_strfmt_num.c b/luajit-2.1/src/lj_strfmt_num.c new file mode 100644 index 0000000..9271f68 --- /dev/null +++ b/luajit-2.1/src/lj_strfmt_num.c @@ -0,0 +1,592 @@ +/* +** String formatting for floating-point numbers. +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Contributed by Peter Cawley. +*/ + +#include <stdio.h> + +#define lj_strfmt_num_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_strfmt.h" + +/* -- Precomputed tables -------------------------------------------------- */ + +/* Rescale factors to push the exponent of a number towards zero. */ +#define RESCALE_EXPONENTS(P, N) \ + P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \ + P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \ + N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \ + N(251), N(270), N(289) + +#define ONE_E_P(X) 1e+0 ## X +#define ONE_E_N(X) 1e-0 ## X +static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) }; +static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) }; +#undef ONE_E_N +#undef ONE_E_P + +/* +** For p in range -70 through 57, this table encodes pairs (m, e) such that +** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds. +*/ +static const int8_t four_ulp_m_e[] = { + 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19, + -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16, + 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14, + 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3, + -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7, + 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103, + -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3, + 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2, + 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4, + 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34, + 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10, + 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13, + 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15, + 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17 +}; + +/* min(2^32-1, 10^e-1) for e in range 0 through 10 */ +static uint32_t ndigits_dec_threshold[] = { + 0, 9U, 99U, 999U, 9999U, 99999U, 999999U, + 9999999U, 99999999U, 999999999U, 0xffffffffU +}; + +/* -- Helper functions ---------------------------------------------------- */ + +/* Compute the number of digits in the decimal representation of x. */ +static MSize ndigits_dec(uint32_t x) +{ + MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */ + return t + (x > ndigits_dec_threshold[t]); +} + +#define WINT_R(x, sh, sc) \ + { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } + +/* Write 9-digit unsigned integer to buffer. */ +static char *lj_strfmt_wuint9(char *p, uint32_t u) +{ + uint32_t v = u / 10000, w; + u -= v * 10000; + w = v / 10000; + v -= w * 10000; + *p++ = (char)('0'+w); + WINT_R(v, 23, 1000) + WINT_R(v, 12, 100) + WINT_R(v, 10, 10) + *p++ = (char)('0'+v); + WINT_R(u, 23, 1000) + WINT_R(u, 12, 100) + WINT_R(u, 10, 10) + *p++ = (char)('0'+u); + return p; +} +#undef WINT_R + +/* -- Extended precision arithmetic --------------------------------------- */ + +/* +** The "nd" format is a fixed-precision decimal representation for numbers. It +** consists of up to 64 uint32_t values, with each uint32_t storing a value +** in the range [0, 1e9). A number in "nd" format consists of three variables: +** +** uint32_t nd[64]; +** uint32_t ndlo; +** uint32_t ndhi; +** +** The integral part of the number is stored in nd[0 ... ndhi], the value of +** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of +** the number is zero, ndlo is zero. Otherwise, the fractional part is stored +** in nd[ndlo ... 63], the value of which is taken to be +** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}. +** +** If the array part had 128 elements rather than 64, then every double would +** have an exact representation in "nd" format. With 64 elements, all integral +** doubles have an exact representation, and all non-integral doubles have +** enough digits to make both %.99e and %.99f do the right thing. +*/ + +#if LJ_64 +#define ND_MUL2K_MAX_SHIFT 29 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) +#else +#define ND_MUL2K_MAX_SHIFT 11 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) +#endif + +/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ +static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, + uint32_t carry_in, SFormat sf) +{ + uint32_t i, ndlo = 0, start = 1; + /* Performance hacks. */ + if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) { + start = ndhi - (STRFMT_PREC(sf) + 17) / 8; + } + /* Real logic. */ + while (k >= ND_MUL2K_MAX_SHIFT) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) { + nd[++ndhi] = carry_in; carry_in = 0; + if (start++ == ndlo) ++ndlo; + } + k -= ND_MUL2K_MAX_SHIFT; + } + if (k) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << k) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) nd[++ndhi] = carry_in; + } + return ndhi; +} + +/* Divide nd by 2^k (ndlo is assumed to be zero). */ +static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) +{ + uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0; + /* Performance hacks. */ + if (!ndhi) { + if (!nd[0]) { + return 0; + } else { + uint32_t s = lj_ffs(nd[0]); + if (s >= k) { nd[0] >>= k; return 0; } + nd[0] >>= s; k -= s; + } + } + if (k > 18) { + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { + stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; + } else { + int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; + int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); + stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9; + stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8; + } + } + /* Real logic. */ + while (k >= 9) { + uint32_t i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> 9) + carry; + carry = (val & 0x1ff) * 1953125; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (ndlo != stop1 && ndlo != stop2) { + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + } else if (!nd[ndhi]) { + if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + else return ndlo; + } + k -= 9; + } + if (k) { + uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> k) + carry; + carry = (val & mask) * mul; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + } + return ndlo; +} + +/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */ +static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) +{ + uint32_t i, carry; + if (e >= 0) { + i = (uint32_t)e/9; + carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1); + } else { + int32_t f = (e-8)/9; + i = (uint32_t)(64 + f); + carry = m * (ndigits_dec_threshold[e - f*9] + 1); + } + for (;;) { + uint32_t val = nd[i] + carry; + if (LJ_UNLIKELY(val >= 1000000000)) { + val -= 1000000000; + nd[i] = val; + if (LJ_UNLIKELY(i == ndhi)) { + ndhi = (ndhi + 1) & 0x3f; + nd[ndhi] = 1; + break; + } + carry = 1; + i = (i + 1) & 0x3f; + } else { + nd[i] = val; + break; + } + } + return ndhi; +} + +/* Test whether two "nd" values are equal in their most significant digits. */ +static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, + MSize prec) +{ + char nd9[9], ref9[9]; + if (hilen <= prec) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f; + if (prec >= 9) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f; + } + } else { + prec -= hilen - 9; + } + lua_assert(prec < 9); + lj_strfmt_wuint9(nd9, nd[ndhi]); + lj_strfmt_wuint9(ref9, *ref); + return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); +} + +/* -- Formatted conversions to buffer ------------------------------------- */ + +/* Write formatted floating-point number to either sb or p. */ +static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) +{ + MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len; + TValue t; + t.n = n; + if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) { + /* Handle non-finite values uniformly for %a, %e, %f, %g. */ + int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; + if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) { + ch ^= ('n' << 16) | ('a' << 8) | 'n'; + if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } else { + ch ^= ('i' << 16) | ('n' << 8) | 'f'; + if ((t.u32.hi & 0x80000000)) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } + len = 3 + (prefix != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + if (prefix) *p++ = prefix; + *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; + } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) { + /* %a */ + const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX" + : "0123456789abcdefpx"; + int32_t e = (t.u32.hi >> 20) & 0x7ff; + char prefix = 0, eprefix = '+'; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + t.u32.hi &= 0xfffff; + if (e) { + t.u32.hi |= 0x100000; + e -= 1023; + } else if (t.u32.lo | t.u32.hi) { + /* Non-zero denormal - normalise it. */ + uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo); + e = -1022 - shift; + t.u64 <<= shift; + } + /* abs(n) == t.u64 * 2^(e - 52) */ + /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */ + if ((int32_t)prec < 0) { + /* Default precision: use smallest precision giving exact result. */ + prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4; + } else if (prec < 13) { + /* Precision is sufficiently low as to maybe require rounding. */ + t.u64 += (((uint64_t)1) << (51 - prec*4)); + } + if (e < 0) { + eprefix = '-'; + e = -e; + } + len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + *p++ = '0'; + *p++ = hexdig[17]; /* x or X */ + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + char *q = p + 1 + prec; + *p = '.'; + if (prec < 13) t.u64 >>= (52 - prec*4); + else while (prec > 13) p[prec--] = '0'; + while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; } + p = q; + } + *p++ = hexdig[16]; /* p or P */ + *p++ = eprefix; /* + or - */ + p = lj_strfmt_wint(p, e); + } else { + /* %e or %f or %g - begin by converting n to "nd" format. */ + uint32_t nd[64]; + uint32_t ndhi = 0, ndlo, i; + int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0; + char prefix = 0, *q; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */ + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) { + /* %g - decrement precision if non-zero (to make it like %e). */ + prec--; + prec ^= (uint32_t)((int32_t)prec >> 31); + } + if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) { + /* Precision is sufficiently low that rescaling will probably work. */ + if ((ndebias = rescale_e[e >> 6])) { + t.n = n * rescale_n[e >> 6]; + if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10; + t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */ + nd[0] = 0x100000 | (t.u32.hi & 0xfffff); + e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29); + goto load_t_lo; rescale_failed: + t.n = n; + e = (t.u32.hi >> 20) & 0x7ff; + ndebias = ndhi = 0; + } + } + nd[0] = t.u32.hi & 0xfffff; + if (e == 0) e++; else nd[0] |= 0x100000; + e -= 1043; + if (t.u32.lo) { + e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo: +#if ND_MUL2K_MAX_SHIFT >= 29 + nd[0] = (nd[0] << 3) | (t.u32.lo >> 29); + ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf); +#elif ND_MUL2K_MAX_SHIFT >= 11 + ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf); +#else +#error "ND_MUL2K_MAX_SHIFT too small" +#endif + } + if (e >= 0) { + ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf); + ndlo = 0; + } else { + ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf); + if (ndhi && !nd[ndhi]) ndhi--; + } + /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */ + if ((sf & STRFMT_T_FP_E)) { + /* %e or %g - assume %e and start by calculating nd's exponent (nde). */ + char eprefix = '+'; + int32_t nde = -1; + MSize hilen; + if (ndlo && !nd[ndhi]) { + ndhi = 64; do {} while (!nd[--ndhi]); + nde -= 64 * 9; + } + hilen = ndigits_dec(nd[ndhi]); + nde += ndhi * 9 + hilen; + if (ndebias) { + /* + ** Rescaling was performed, but this introduced some error, and might + ** have pushed us across a rounding boundary. We check whether this + ** error affected the result by introducing even more error (2ulp in + ** either direction), and seeing whether a roundary boundary was + ** crossed. Having already converted the -2ulp case, we save off its + ** most significant digits, convert the +2ulp case, and compare them. + */ + int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); + const int8_t *m_e = four_ulp_m_e + eidx * 2; + lua_assert(0 <= eidx && eidx < 128); + nd[33] = nd[ndhi]; + nd[32] = nd[(ndhi - 1) & 0x3f]; + nd[31] = nd[(ndhi - 2) & 0x3f]; + nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]); + if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) { + goto rescale_failed; + } + } + if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); + nde += (hilen != ndigits_dec(nd[ndhi])); + } + nde += ndebias; + if ((sf & STRFMT_T_FP_F)) { + /* %g */ + if ((int32_t)prec >= nde && nde >= -4) { + if (nde < 0) ndhi = 0; + prec -= nde; + goto g_format_like_f; + } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) { + /* Decrease precision in order to strip trailing zeroes. */ + char tail[9]; + uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f; + i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10; + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == ndhi) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } + } + if (nde < 0) { + /* Make nde non-negative. */ + eprefix = '-'; + nde = -nde; + } + len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + q = lj_strfmt_wint(p + 1, nd[ndhi]); + p[0] = p[1]; /* Put leading digit in the correct place. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + p[1] = '.'; p += 2; + prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */ + /* Then emit chunks of 9 digits (this may emit 8 digits too many). */ + for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + } + if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %e (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } else { + p++; + } + *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e'; + *p++ = eprefix; /* + or - */ + if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */ + p = lj_strfmt_wint(p, nde); + } else { + /* %f (or, shortly, %g in %f style) */ + if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); + } + g_format_like_f: + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { + /* Decrease precision in order to strip trailing zeroes. */ + if (ndlo) { + /* nd has a fractional part; we need to look at its digits. */ + char tail[9]; + uint32_t maxprec = (64 - ndlo) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = 64 - (prec + 8) / 9; + i = prec - ((63 - ndlo) * 9); + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == 63) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } else { + /* nd has no fractional part, so precision goes straight to zero. */ + prec = 0; + } + } + len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + /* Emit integer part. */ + p = lj_strfmt_wint(p, nd[ndhi]); + i = ndhi; + while (i) p = lj_strfmt_wuint9(p, nd[--i]); + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + *p++ = '.'; + /* Emit chunks of 9 digits (this may emit 8 digits too many). */ + while ((int32_t)prec > 0 && i != ndlo) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + prec -= 9; + } + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %f (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } + } + } + if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + return p; +} + +/* Add formatted floating-point number to buffer. */ +SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) +{ + setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL)); + return sb; +} + +/* -- Conversions to strings ---------------------------------------------- */ + +/* Convert number to string. */ +GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) +{ + char buf[STRFMT_MAXBUF_NUM]; + MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); + return lj_str_new(L, buf, len); +} + diff --git a/luajit-2.1/src/lj_strscan.c b/luajit-2.1/src/lj_strscan.c index d3c5ba9..f5f35c9 100644 --- a/luajit-2.1/src/lj_strscan.c +++ b/luajit-2.1/src/lj_strscan.c @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include <math.h> diff --git a/luajit-2.1/src/lj_strscan.h b/luajit-2.1/src/lj_strscan.h index 7760689..6fb0dda 100644 --- a/luajit-2.1/src/lj_strscan.h +++ b/luajit-2.1/src/lj_strscan.h @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRSCAN_H diff --git a/luajit-2.1/src/lj_tab.c b/luajit-2.1/src/lj_tab.c index 88bf108..47c0cfd 100644 --- a/luajit-2.1/src/lj_tab.c +++ b/luajit-2.1/src/lj_tab.c @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -28,7 +28,6 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) #if LJ_GC64 #define hashgcref(t, r) \ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) diff --git a/luajit-2.1/src/lj_tab.h b/luajit-2.1/src/lj_tab.h index 7cf031b..71e3494 100644 --- a/luajit-2.1/src/lj_tab.h +++ b/luajit-2.1/src/lj_tab.h @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TAB_H diff --git a/luajit-2.1/src/lj_target.h b/luajit-2.1/src/lj_target.h index 0daecb1..8dcae95 100644 --- a/luajit-2.1/src/lj_target.h +++ b/luajit-2.1/src/lj_target.h @@ -1,6 +1,6 @@ /* ** Definitions for target CPU. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_H @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; #else typedef uint32_t RegSet; @@ -69,7 +69,7 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) #else diff --git a/luajit-2.1/src/lj_target_arm.h b/luajit-2.1/src/lj_target_arm.h index 0a243b3..5551b1f 100644 --- a/luajit-2.1/src/lj_target_arm.h +++ b/luajit-2.1/src/lj_target_arm.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM CPUs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM_H diff --git a/luajit-2.1/src/lj_target_arm64.h b/luajit-2.1/src/lj_target_arm64.h index 99e0adc..520023a 100644 --- a/luajit-2.1/src/lj_target_arm64.h +++ b/luajit-2.1/src/lj_target_arm64.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM64_H @@ -55,7 +55,8 @@ enum { /* Make use of all registers, except for x18, fp, lr and sp. */ #define RSET_FIXED \ - (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)) + (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ + RID2RSET(RID_GL)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) #define RSET_ALL (RSET_GPR|RSET_FPR) @@ -73,25 +74,256 @@ enum { #define REGARG_LASTFPR RID_D7 #define REGARG_NUMFPR 8 +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the vm_arm64.dasc file. +** Pre-allocate some slots to avoid sp adjust in every root trace. +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 4 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) +{ + while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + return p + 3 + exitno; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) + /* -- Instructions -------------------------------------------------------- */ +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +#if LJ_BE +#define A64I_LE(x) (lj_bswap(x)) +#else +#define A64I_LE(x) (x) +#endif + /* Instruction fields. */ #define A64F_D(r) (r) -#define A64F_N(r) ((r) << 5) -#define A64F_A(r) ((r) << 10) -#define A64F_M(r) ((r) << 16) +#define A64F_N(r) ((r) << 5) +#define A64F_A(r) ((r) << 10) +#define A64F_M(r) ((r) << 16) +#define A64F_IMMS(x) ((x) << 10) +#define A64F_IMMR(x) ((x) << 16) #define A64F_U16(x) ((x) << 5) +#define A64F_U12(x) ((x) << 10) #define A64F_S26(x) (x) -#define A64F_S19(x) ((x) << 5) +#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) +#define A64F_S14(x) ((x) << 5) +#define A64F_S9(x) ((x) << 12) +#define A64F_BIT(x) ((x) << 19) +#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) +#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) +#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) +#define A64F_FP8(x) ((x) << 13) +#define A64F_CC(cc) ((cc) << 12) +#define A64F_LSL16(x) (((x) / 16) << 21) +#define A64F_BSH(sh) ((sh) << 10) typedef enum A64Ins { + A64I_S = 0x20000000, + A64I_X = 0x80000000, + A64I_EX = 0x00200000, + A64I_ON = 0x00200000, + A64I_K12 = 0x1a000000, + A64I_K13 = 0x18000000, + A64I_LS_U = 0x01000000, + A64I_LS_S = 0x00800000, + A64I_LS_R = 0x01200800, + A64I_LS_SH = 0x00001000, + A64I_LS_UXTWx = 0x00004000, + A64I_LS_SXTWx = 0x0000c000, + A64I_LS_SXTXx = 0x0000e000, + A64I_LS_LSLx = 0x00006000, + + A64I_ADDw = 0x0b000000, + A64I_ADDx = 0x8b000000, + A64I_ADDSw = 0x2b000000, + A64I_ADDSx = 0xab000000, + A64I_NEGw = 0x4b0003e0, + A64I_NEGx = 0xcb0003e0, + A64I_SUBw = 0x4b000000, + A64I_SUBx = 0xcb000000, + A64I_SUBSw = 0x6b000000, + A64I_SUBSx = 0xeb000000, + + A64I_MULw = 0x1b007c00, + A64I_MULx = 0x9b007c00, + A64I_SMULL = 0x9b207c00, + + A64I_ANDw = 0x0a000000, + A64I_ANDx = 0x8a000000, + A64I_ANDSw = 0x6a000000, + A64I_ANDSx = 0xea000000, + A64I_EORw = 0x4a000000, + A64I_EORx = 0xca000000, + A64I_ORRw = 0x2a000000, + A64I_ORRx = 0xaa000000, + A64I_TSTw = 0x6a00001f, + A64I_TSTx = 0xea00001f, + + A64I_CMPw = 0x6b00001f, + A64I_CMPx = 0xeb00001f, + A64I_CMNw = 0x2b00001f, + A64I_CMNx = 0xab00001f, + A64I_CCMPw = 0x7a400000, + A64I_CCMPx = 0xfa400000, + A64I_CSELw = 0x1a800000, + A64I_CSELx = 0x9a800000, + + A64I_ASRw = 0x13007c00, + A64I_ASRx = 0x9340fc00, + A64I_LSLx = 0xd3400000, + A64I_LSRx = 0xd340fc00, + A64I_SHRw = 0x1ac02000, + A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ + A64I_REVw = 0x5ac00800, + A64I_REVx = 0xdac00c00, + + A64I_EXTRw = 0x13800000, + A64I_EXTRx = 0x93c00000, + A64I_SBFMw = 0x13000000, + A64I_SBFMx = 0x93400000, + A64I_SXTBw = 0x13001c00, + A64I_SXTHw = 0x13003c00, + A64I_SXTW = 0x93407c00, + A64I_UBFMw = 0x53000000, + A64I_UBFMx = 0xd3400000, + A64I_UXTBw = 0x53001c00, + A64I_UXTHw = 0x53003c00, + + A64I_MOVw = 0x2a0003e0, + A64I_MOVx = 0xaa0003e0, + A64I_MVNw = 0x2a2003e0, + A64I_MVNx = 0xaa2003e0, + A64I_MOVKw = 0x72800000, + A64I_MOVKx = 0xf2800000, A64I_MOVZw = 0x52800000, A64I_MOVZx = 0xd2800000, + A64I_MOVNw = 0x12800000, + A64I_MOVNx = 0x92800000, + + A64I_LDRB = 0x39400000, + A64I_LDRH = 0x79400000, + A64I_LDRw = 0xb9400000, + A64I_LDRx = 0xf9400000, A64I_LDRLw = 0x18000000, A64I_LDRLx = 0x58000000, - A64I_NOP = 0xd503201f, + A64I_STRB = 0x39000000, + A64I_STRH = 0x79000000, + A64I_STRw = 0xb9000000, + A64I_STRx = 0xf9000000, + A64I_STPw = 0x29000000, + A64I_STPx = 0xa9000000, + A64I_LDPw = 0x29400000, + A64I_LDPx = 0xa9400000, + A64I_B = 0x14000000, + A64I_BCC = 0x54000000, + A64I_BL = 0x94000000, A64I_BR = 0xd61f0000, + A64I_BLR = 0xd63f0000, + A64I_TBZ = 0x36000000, + A64I_TBNZ = 0x37000000, + A64I_CBZ = 0x34000000, + A64I_CBNZ = 0x35000000, + + A64I_NOP = 0xd503201f, + + /* FP */ + A64I_FADDd = 0x1e602800, + A64I_FSUBd = 0x1e603800, + A64I_FMADDd = 0x1f400000, + A64I_FMSUBd = 0x1f408000, + A64I_FNMADDd = 0x1f600000, + A64I_FNMSUBd = 0x1f608000, + A64I_FMULd = 0x1e600800, + A64I_FDIVd = 0x1e601800, + A64I_FNEGd = 0x1e614000, + A64I_FABS = 0x1e60c000, + A64I_FSQRTd = 0x1e61c000, + A64I_LDRs = 0xbd400000, + A64I_LDRd = 0xfd400000, + A64I_STRs = 0xbd000000, + A64I_STRd = 0xfd000000, + A64I_LDPs = 0x2d400000, + A64I_LDPd = 0x6d400000, + A64I_STPs = 0x2d000000, + A64I_STPd = 0x6d000000, + A64I_FCMPd = 0x1e602000, + A64I_FCMPZd = 0x1e602008, + A64I_FCSELd = 0x1e600c00, + A64I_FRINTMd = 0x1e654000, + A64I_FRINTPd = 0x1e64c000, + A64I_FRINTZd = 0x1e65c000, + + A64I_FCVT_F32_F64 = 0x1e624000, + A64I_FCVT_F64_F32 = 0x1e22c000, + A64I_FCVT_F32_S32 = 0x1e220000, + A64I_FCVT_F64_S32 = 0x1e620000, + A64I_FCVT_F32_U32 = 0x1e230000, + A64I_FCVT_F64_U32 = 0x1e630000, + A64I_FCVT_F32_S64 = 0x9e220000, + A64I_FCVT_F64_S64 = 0x9e620000, + A64I_FCVT_F32_U64 = 0x9e230000, + A64I_FCVT_F64_U64 = 0x9e630000, + A64I_FCVT_S32_F64 = 0x1e780000, + A64I_FCVT_S32_F32 = 0x1e380000, + A64I_FCVT_U32_F64 = 0x1e790000, + A64I_FCVT_U32_F32 = 0x1e390000, + A64I_FCVT_S64_F64 = 0x9e780000, + A64I_FCVT_S64_F32 = 0x9e380000, + A64I_FCVT_U64_F64 = 0x9e790000, + A64I_FCVT_U64_F32 = 0x9e390000, + + A64I_FMOV_S = 0x1e204000, + A64I_FMOV_D = 0x1e604000, + A64I_FMOV_R_S = 0x1e260000, + A64I_FMOV_S_R = 0x1e270000, + A64I_FMOV_R_D = 0x9e660000, + A64I_FMOV_D_R = 0x9e670000, + A64I_FMOV_DI = 0x1e601000, } A64Ins; +typedef enum A64Shift { + A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR +} A64Shift; + +typedef enum A64Extend { + A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, + A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, +} A64Extend; + +/* ARM condition codes. */ +typedef enum A64CC { + CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, + CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, + CC_HS = CC_CS, CC_LO = CC_CC +} A64CC; + #endif diff --git a/luajit-2.1/src/lj_target_mips.h b/luajit-2.1/src/lj_target_mips.h index 76645bc..740687b 100644 --- a/luajit-2.1/src/lj_target_mips.h +++ b/luajit-2.1/src/lj_target_mips.h @@ -1,6 +1,6 @@ /* ** Definitions for MIPS CPUs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_MIPS_H @@ -13,11 +13,15 @@ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) +#if LJ_SOFTFP +#define FPRDEF(_) +#else #define FPRDEF(_) \ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) +#endif #define VRIDDEF(_) #define RIDENUM(name) RID_##name, @@ -28,6 +32,7 @@ enum { RID_MAX, RID_ZERO = RID_R0, RID_TMP = RID_RA, + RID_GP = RID_R28, /* Calling conventions. */ RID_RET = RID_R2, @@ -38,7 +43,11 @@ enum { RID_RETHI = RID_R2, RID_RETLO = RID_R3, #endif +#if LJ_SOFTFP + RID_FPRET = RID_R2, +#else RID_FPRET = RID_F0, +#endif RID_CFUNCADDR = RID_R25, /* These definitions must match with the *.dasc file(s): */ @@ -51,8 +60,12 @@ enum { /* Register ranges [min, max) and number of registers. */ RID_MIN_GPR = RID_R0, RID_MAX_GPR = RID_RA+1, - RID_MIN_FPR = RID_F0, + RID_MIN_FPR = RID_MAX_GPR, +#if LJ_SOFTFP + RID_MAX_FPR = RID_MIN_FPR, +#else RID_MAX_FPR = RID_F31+1, +#endif RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ }; @@ -62,33 +75,65 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2 and JGL. */ +/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2, JGL and GP. */ #define RSET_FIXED \ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ - RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)) + RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#if LJ_SOFTFP +#define RSET_FPR 0 +#else +#if LJ_32 #define RSET_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL +#else +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) +#endif +#endif +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL #define RSET_SCRATCH_GPR \ (RSET_RANGE(RID_R1, RID_R15+1)|\ - RID2RSET(RID_R24)|RID2RSET(RID_R25)|RID2RSET(RID_R28)) + RID2RSET(RID_R24)|RID2RSET(RID_R25)) +#if LJ_SOFTFP +#define RSET_SCRATCH_FPR 0 +#else +#if LJ_32 #define RSET_SCRATCH_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)) +#else +#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) +#endif +#endif #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define REGARG_FIRSTGPR RID_R4 +#if LJ_32 #define REGARG_LASTGPR RID_R7 #define REGARG_NUMGPR 4 +#else +#define REGARG_LASTGPR RID_R11 +#define REGARG_NUMGPR 8 +#endif +#if LJ_ABI_SOFTFP +#define REGARG_FIRSTFPR 0 +#define REGARG_LASTFPR 0 +#define REGARG_NUMFPR 0 +#else #define REGARG_FIRSTFPR RID_F12 +#if LJ_32 #define REGARG_LASTFPR RID_F14 #define REGARG_NUMFPR 2 +#else +#define REGARG_LASTFPR RID_F19 +#define REGARG_NUMFPR 8 +#endif +#endif /* -- Spill slots --------------------------------------------------------- */ @@ -99,7 +144,11 @@ enum { ** ** SPS_FIRST: First spill slot for general use. */ +#if LJ_32 #define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif #define SPS_FIRST 4 #define SPOFS_TMP 0 @@ -111,8 +160,10 @@ enum { /* This definition must match with the *.dasc file(s). */ typedef struct { +#if !LJ_SOFTFP lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ +#endif + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ int32_t spill[256]; /* Spill slots. */ } ExitState; @@ -141,32 +192,41 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) #define MIPSF_F(r) ((r) << 6) #define MIPSF_A(n) ((n) << 6) #define MIPSF_M(n) ((n) << 11) +#define MIPSF_L(n) ((n) << 6) typedef enum MIPSIns { + MIPSI_D = 0x38, + MIPSI_DV = 0x10, + MIPSI_D32 = 0x3c, /* Integer instructions. */ - MIPSI_MOVE = 0x00000021, + MIPSI_MOVE = 0x00000025, MIPSI_NOP = 0x00000000, MIPSI_LI = 0x24000000, MIPSI_LU = 0x34000000, MIPSI_LUI = 0x3c000000, - MIPSI_ADDIU = 0x24000000, + MIPSI_AND = 0x00000024, MIPSI_ANDI = 0x30000000, + MIPSI_OR = 0x00000025, MIPSI_ORI = 0x34000000, + MIPSI_XOR = 0x00000026, MIPSI_XORI = 0x38000000, + MIPSI_NOR = 0x00000027, + + MIPSI_SLT = 0x0000002a, + MIPSI_SLTU = 0x0000002b, MIPSI_SLTI = 0x28000000, MIPSI_SLTIU = 0x2c000000, MIPSI_ADDU = 0x00000021, + MIPSI_ADDIU = 0x24000000, + MIPSI_SUB = 0x00000022, MIPSI_SUBU = 0x00000023, MIPSI_MUL = 0x70000002, - MIPSI_AND = 0x00000024, - MIPSI_OR = 0x00000025, - MIPSI_XOR = 0x00000026, - MIPSI_NOR = 0x00000027, - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, + MIPSI_DIV = 0x0000001a, + MIPSI_DIVU = 0x0000001b, + MIPSI_MOVZ = 0x0000000a, MIPSI_MOVN = 0x0000000b, MIPSI_MFHI = 0x00000010, @@ -176,19 +236,24 @@ typedef enum MIPSIns { MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002, MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ + MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ + MIPSI_DROTR = 0x0020003a, + MIPSI_DROTR32 = 0x0020003e, MIPSI_SLLV = 0x00000004, MIPSI_SRLV = 0x00000006, MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ + MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ + MIPSI_DROTRV = 0x00000056, - MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ - MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ + MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ + MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ + MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ + MIPSI_DSBH = 0x7c0000a4, MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, MIPSI_JAL = 0x0c000000, + MIPSI_JALX = 0x74000000, MIPSI_JR = 0x00000008, MIPSI_JALR = 0x0000f809, @@ -201,7 +266,9 @@ typedef enum MIPSIns { /* Load/store instructions. */ MIPSI_LW = 0x8c000000, + MIPSI_LD = 0xdc000000, MIPSI_SW = 0xac000000, + MIPSI_SD = 0xfc000000, MIPSI_LB = 0x80000000, MIPSI_SB = 0xa0000000, MIPSI_LH = 0x84000000, @@ -213,6 +280,50 @@ typedef enum MIPSIns { MIPSI_LDC1 = 0xd4000000, MIPSI_SDC1 = 0xf4000000, + /* MIPS64 instructions. */ + MIPSI_DADD = 0x0000002c, + MIPSI_DADDI = 0x60000000, + MIPSI_DADDU = 0x0000002d, + MIPSI_DADDIU = 0x64000000, + MIPSI_DSUB = 0x0000002e, + MIPSI_DSUBU = 0x0000002f, + MIPSI_DDIV = 0x0000001e, + MIPSI_DDIVU = 0x0000001f, + MIPSI_DMULT = 0x0000001c, + MIPSI_DMULTU = 0x0000001d, + + MIPSI_DSLL = 0x00000038, + MIPSI_DSRL = 0x0000003a, + MIPSI_DSLLV = 0x00000014, + MIPSI_DSRLV = 0x00000016, + MIPSI_DSRA = 0x0000003b, + MIPSI_DSRAV = 0x00000017, + MIPSI_DSRA32 = 0x0000003f, + MIPSI_DSLL32 = 0x0000003c, + MIPSI_DSRL32 = 0x0000003e, + MIPSI_DSHD = 0x7c000164, + + MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, + MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, + MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, + MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, + MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, + + /* Extract/insert instructions. */ + MIPSI_DEXTM = 0x7c000001, + MIPSI_DEXTU = 0x7c000002, + MIPSI_DEXT = 0x7c000003, + MIPSI_DINSM = 0x7c000005, + MIPSI_DINSU = 0x7c000006, + MIPSI_DINS = 0x7c000007, + + MIPSI_RINT_D = 0x4620001a, + MIPSI_RINT_S = 0x4600001a, + MIPSI_RINT = 0x4400001a, + MIPSI_FLOOR_D = 0x4620000b, + MIPSI_CEIL_D = 0x4620000a, + MIPSI_ROUND_D = 0x46200008, + /* FP instructions. */ MIPSI_MOV_S = 0x46000006, MIPSI_MOV_D = 0x46200006, @@ -237,24 +348,30 @@ typedef enum MIPSIns { MIPSI_CVT_W_D = 0x46200024, MIPSI_CVT_S_W = 0x46800020, MIPSI_CVT_D_W = 0x46800021, + MIPSI_CVT_S_L = 0x46a00020, + MIPSI_CVT_D_L = 0x46a00021, MIPSI_TRUNC_W_S = 0x4600000d, MIPSI_TRUNC_W_D = 0x4620000d, + MIPSI_TRUNC_L_S = 0x46000009, + MIPSI_TRUNC_L_D = 0x46200009, MIPSI_FLOOR_W_S = 0x4600000f, MIPSI_FLOOR_W_D = 0x4620000f, MIPSI_MFC1 = 0x44000000, MIPSI_MTC1 = 0x44800000, + MIPSI_DMTC1 = 0x44a00000, + MIPSI_DMFC1 = 0x44200000, MIPSI_BC1F = 0x45000000, MIPSI_BC1T = 0x45010000, MIPSI_C_EQ_D = 0x46200032, + MIPSI_C_OLT_S = 0x46000034, MIPSI_C_OLT_D = 0x46200034, MIPSI_C_ULT_D = 0x46200035, MIPSI_C_OLE_D = 0x46200036, MIPSI_C_ULE_D = 0x46200037, - } MIPSIns; #endif diff --git a/luajit-2.1/src/lj_target_ppc.h b/luajit-2.1/src/lj_target_ppc.h index 9986768..c5c991a 100644 --- a/luajit-2.1/src/lj_target_ppc.h +++ b/luajit-2.1/src/lj_target_ppc.h @@ -1,6 +1,6 @@ /* ** Definitions for PPC CPUs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_PPC_H diff --git a/luajit-2.1/src/lj_target_x86.h b/luajit-2.1/src/lj_target_x86.h index fc9d370..356f792 100644 --- a/luajit-2.1/src/lj_target_x86.h +++ b/luajit-2.1/src/lj_target_x86.h @@ -1,6 +1,6 @@ /* ** Definitions for x86 and x64 CPUs. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_X86_H @@ -22,7 +22,7 @@ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) #endif #define VRIDDEF(_) \ - _(MRM) + _(MRM) _(RIP) #define RIDENUM(name) RID_##name, @@ -31,6 +31,7 @@ enum { FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ + RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ /* Calling conventions. */ RID_SP = RID_ESP, @@ -63,8 +64,10 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except the stack pointer. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) +/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ + - RID2RSET(RID_ESP) \ + - LJ_GC64*RID2RSET(RID_DISPATCH)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL @@ -189,12 +192,18 @@ typedef struct { #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) +#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) +#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) +#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) +#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) + /* This list of x86 opcodes is not intended to be complete. Opcodes are only ** included when needed. Take a look at DynASM or jit.dis_x86 to see the ** whole mess. */ typedef enum { /* Fixed length opcodes. XI_* prefix. */ + XI_O16 = 0x66, XI_NOP = 0x90, XI_XCHGa = 0x90, XI_CALL = 0xe8, @@ -212,6 +221,7 @@ typedef enum { XI_PUSHi8 = 0x6a, XI_TESTb = 0x84, XI_TEST = 0x85, + XI_INT3 = 0xcc, XI_MOVmi = 0xc7, XI_GROUP5 = 0xff, @@ -231,7 +241,14 @@ typedef enum { XI_FSCALE = 0xfdd9, XI_FYL2X = 0xf1d9, + /* VEX-encoded instructions. XV_* prefix. */ + XV_RORX = XV_f20f3a(f0), + XV_SARX = XV_f30f38(f7), + XV_SHLX = XV_660f38(f7), + XV_SHRX = XV_f20f38(f7), + /* Variable-length opcodes. XO_* prefix. */ + XO_OR = XO_(0b), XO_MOV = XO_(8b), XO_MOVto = XO_(89), XO_MOVtow = XO_66(89), diff --git a/luajit-2.1/src/lj_trace.c b/luajit-2.1/src/lj_trace.c index 1d0c2e5..d85b47f 100644 --- a/luajit-2.1/src/lj_trace.c +++ b/luajit-2.1/src/lj_trace.c @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_trace_c @@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T) } #endif -/* Allocate space for copy of trace. */ -static GCtrace *trace_save_alloc(jit_State *J) +/* Allocate space for copy of T. */ +GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) { size_t sztr = ((sizeof(GCtrace)+7)&~7); - size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); + size_t szins = (T->nins-T->nk)*sizeof(IRIns); size_t sz = sztr + szins + - J->cur.nsnap*sizeof(SnapShot) + - J->cur.nsnapmap*sizeof(SnapEntry); - return lj_mem_newt(J->L, (MSize)sz, GCtrace); + T->nsnap*sizeof(SnapShot) + + T->nsnapmap*sizeof(SnapEntry); + GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace); + char *p = (char *)T2 + sztr; + T2->gct = ~LJ_TTRACE; + T2->marked = 0; + T2->traceno = 0; + T2->ir = (IRIns *)p - T->nk; + T2->nins = T->nins; + T2->nk = T->nk; + T2->nsnap = T->nsnap; + T2->nsnapmap = T->nsnapmap; + memcpy(p, T->ir + T->nk, szins); + return T2; } /* Save current trace by copying and compacting it. */ @@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T) setgcrefp(J2G(J)->gc.root, T); newwhite(J2G(J), T); T->gct = ~LJ_TTRACE; - T->ir = (IRIns *)p - J->cur.nk; - memcpy(p, J->cur.ir+J->cur.nk, szins); + T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) J->cur.traceno = 0; + J->curfinal = NULL; setgcrefp(J->trace[T->traceno], T); lj_gc_barriertrace(J2G(J), T->traceno); lj_gdbjit_addtrace(J, T); @@ -284,7 +295,6 @@ int lj_trace_flushall(lua_State *L) memset(J->penalty, 0, sizeof(J->penalty)); /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); - lj_ir_k64_freeall(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); lj_vmevent_send(L, TRACE, setstrV(L, L->top++, lj_str_newlit(L, "flush")); @@ -297,13 +307,42 @@ void lj_trace_initstate(global_State *g) { jit_State *J = G2J(g); TValue *tv; - /* Initialize SIMD constants. */ + + /* Initialize aligned SIMD constants. */ tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv[0].u64 = U64x(7fffffff,ffffffff); tv[1].u64 = U64x(7fffffff,ffffffff); tv = LJ_KSIMD(J, LJ_KSIMD_NEG); tv[0].u64 = U64x(80000000,00000000); tv[1].u64 = U64x(80000000,00000000); + + /* Initialize 32/64 bit constants. */ +#if LJ_TARGET_X86ORX64 + J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); +#if LJ_32 + J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); +#endif + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); + J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; +#endif +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#endif +#if LJ_TARGET_PPC + J->k32[LJ_K32_2P52_2P31] = 0x59800004; + J->k32[LJ_K32_2P52] = 0x59800000; +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + J->k32[LJ_K32_2P31] = 0x4f000000; +#endif +#if LJ_TARGET_MIPS + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#if LJ_64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); + J->k32[LJ_K32_2P63] = 0x5f000000; + J->k32[LJ_K32_M2P64] = 0xdf800000; +#endif +#endif } /* Free everything associated with the JIT compiler state. */ @@ -318,7 +357,6 @@ void lj_trace_freestate(global_State *g) } #endif lj_mcode_free(J); - lj_ir_k64_freeall(J); lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); @@ -403,7 +441,7 @@ static void trace_start(jit_State *J) J->postproc = LJ_POST_NONE; lj_resetsplit(J); J->retryrec = 0; - J->ktracep = NULL; + J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); L = J->L; @@ -415,6 +453,12 @@ static void trace_start(jit_State *J) if (J->parent) { setintV(L->top++, J->parent); setintV(L->top++, J->exitno); + } else { + BCOp op = bc_op(*J->pc); + if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { + setintV(L->top++, J->exitno); /* Parent of stitched trace. */ + setintV(L->top++, -1); + } } ); lj_record_setup(J); @@ -427,7 +471,7 @@ static void trace_stop(jit_State *J) BCOp op = bc_op(J->cur.startins); GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; - GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ + GCtrace *T = J->curfinal; lua_State *L; switch (op) { @@ -479,9 +523,6 @@ static void trace_stop(jit_State *J) lj_mcode_commit(J, J->cur.mcode); J->postproc = LJ_POST_NONE; trace_save(J, T); - if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */ - setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE); - } L = J->L; lj_vmevent_send(L, TRACE, @@ -515,6 +556,10 @@ static int trace_abort(jit_State *J) J->postproc = LJ_POST_NONE; lj_mcode_abort(J); + if (J->curfinal) { + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; + } if (tvisnumber(L->top-1)) e = (TraceError)numberVint(L->top-1); if (e == LJ_TRERR_MCODELM) { @@ -849,7 +894,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: - return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2); + return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); case BC_RETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: diff --git a/luajit-2.1/src/lj_trace.h b/luajit-2.1/src/lj_trace.h index 9eaf91b..22cae74 100644 --- a/luajit-2.1/src/lj_trace.h +++ b/luajit-2.1/src/lj_trace.h @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TRACE_H @@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); /* Trace management. */ +LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); diff --git a/luajit-2.1/src/lj_traceerr.h b/luajit-2.1/src/lj_traceerr.h index d434be1..1363c4f 100644 --- a/luajit-2.1/src/lj_traceerr.h +++ b/luajit-2.1/src/lj_traceerr.h @@ -1,6 +1,6 @@ /* ** Trace compiler error messages. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different TREDEF macros. */ diff --git a/luajit-2.1/src/lj_udata.c b/luajit-2.1/src/lj_udata.c index d401a3d..bd0321b 100644 --- a/luajit-2.1/src/lj_udata.c +++ b/luajit-2.1/src/lj_udata.c @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_udata_c diff --git a/luajit-2.1/src/lj_udata.h b/luajit-2.1/src/lj_udata.h index 676e970..f271a42 100644 --- a/luajit-2.1/src/lj_udata.h +++ b/luajit-2.1/src/lj_udata.h @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_UDATA_H diff --git a/luajit-2.1/src/lj_vm.h b/luajit-2.1/src/lj_vm.h index b31e22f..1cc7eed 100644 --- a/luajit-2.1/src/lj_vm.h +++ b/luajit-2.1/src/lj_vm.h @@ -1,6 +1,6 @@ /* ** Assembler VM interface definitions. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VM_H @@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); +#if LJ_ABI_WIN && LJ_TARGET_X86 +LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, + void *unwinder, int errcode); +#endif LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void); #if LJ_TARGET_X86ORX64 @@ -50,7 +54,7 @@ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); /* Internal math helper functions. */ -#if LJ_TARGET_PPC || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) #define lj_vm_floor floor #define lj_vm_ceil ceil #else @@ -66,6 +70,9 @@ LJ_ASMF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif +#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) +LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); +#endif #if LJ_HASJIT #if LJ_TARGET_X86ORX64 @@ -90,7 +97,6 @@ LJ_ASMF double lj_vm_exp2(double); #else #define lj_vm_exp2 exp2 #endif -LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); #endif diff --git a/luajit-2.1/src/lj_vmevent.c b/luajit-2.1/src/lj_vmevent.c index 87ebcfb..8664080 100644 --- a/luajit-2.1/src/lj_vmevent.c +++ b/luajit-2.1/src/lj_vmevent.c @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #include <stdio.h> diff --git a/luajit-2.1/src/lj_vmevent.h b/luajit-2.1/src/lj_vmevent.h index 231e00e..050fb4d 100644 --- a/luajit-2.1/src/lj_vmevent.h +++ b/luajit-2.1/src/lj_vmevent.h @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VMEVENT_H diff --git a/luajit-2.1/src/lj_vmmath.c b/luajit-2.1/src/lj_vmmath.c index ecad295..b231d3e 100644 --- a/luajit-2.1/src/lj_vmmath.c +++ b/luajit-2.1/src/lj_vmmath.c @@ -1,6 +1,6 @@ /* ** Math helper functions for assembler VM. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #define lj_vmmath_c @@ -57,6 +57,20 @@ double lj_vm_foldarith(double x, double y, int op) } } +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) +{ + uint32_t y, ua, ub; + lua_assert(b != 0); /* This must be checked before using this function. */ + ua = a < 0 ? (uint32_t)-a : (uint32_t)a; + ub = b < 0 ? (uint32_t)-b : (uint32_t)b; + y = ua % ub; + if (y != 0 && (a^b) < 0) y = y - ub; + if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; + return (int32_t)y; +} +#endif + #if LJ_HASJIT #ifdef LUAJIT_NO_LOG2 @@ -73,20 +87,6 @@ double lj_vm_exp2(double a) } #endif -#if !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC) -int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) -{ - uint32_t y, ua, ub; - lua_assert(b != 0); /* This must be checked before using this function. */ - ua = a < 0 ? (uint32_t)-a : (uint32_t)a; - ub = b < 0 ? (uint32_t)-b : (uint32_t)b; - y = ua % ub; - if (y != 0 && (a^b) < 0) y = y - ub; - if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; - return (int32_t)y; -} -#endif - #if !LJ_TARGET_X86ORX64 /* Unsigned x^k. */ static double lj_vm_powui(double x, uint32_t k) diff --git a/luajit-2.1/src/ljamalg.c b/luajit-2.1/src/ljamalg.c index be0c52d..f1f2862 100644 --- a/luajit-2.1/src/ljamalg.c +++ b/luajit-2.1/src/ljamalg.c @@ -1,6 +1,6 @@ /* ** LuaJIT core and libraries amalgamation. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ /* @@ -46,6 +46,7 @@ #include "lj_vmmath.c" #include "lj_strscan.c" #include "lj_strfmt.c" +#include "lj_strfmt_num.c" #include "lj_api.c" #include "lj_profile.c" #include "lj_lex.c" diff --git a/luajit-2.1/src/lua.h b/luajit-2.1/src/lua.h index 352d29f..850bd79 100644 --- a/luajit-2.1/src/lua.h +++ b/luajit-2.1/src/lua.h @@ -39,7 +39,8 @@ #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) -/* thread status; 0 is OK */ +/* thread status */ +#define LUA_OK 0 #define LUA_YIELD 1 #define LUA_ERRRUN 2 #define LUA_ERRSYNTAX 3 @@ -347,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, const char *chunkname, const char *mode); +LUA_API const lua_Number *lua_version (lua_State *L); +LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); +LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); +LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); + +/* From Lua 5.3. */ +LUA_API int lua_isyieldable (lua_State *L); struct lua_Debug { diff --git a/luajit-2.1/src/luaconf.h b/luajit-2.1/src/luaconf.h index 6375ed3..2b6e43b 100644 --- a/luajit-2.1/src/luaconf.h +++ b/luajit-2.1/src/luaconf.h @@ -1,6 +1,6 @@ /* ** Configuration header. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef luaconf_h @@ -37,7 +37,7 @@ #endif #define LUA_LROOT "/usr/local" #define LUA_LUADIR "/lua/5.1/" -#define LUA_LJDIR "/luajit-2.1.0-beta1/" +#define LUA_LJDIR "/luajit-2.1.0-beta2/" #ifdef LUA_ROOT #define LUA_JROOT LUA_ROOT @@ -79,7 +79,7 @@ #define LUA_IGMARK "-" #define LUA_PATH_CONFIG \ LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ - LUA_EXECDIR "\n" LUA_IGMARK + LUA_EXECDIR "\n" LUA_IGMARK "\n" /* Quoting in error messages. */ #define LUA_QL(x) "'" x "'" @@ -92,10 +92,6 @@ #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ -/* Compatibility with older library function names. */ -#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ -#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ - /* Configuration for the frontend (the luajit executable). */ #if defined(luajit_c) #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ diff --git a/luajit-2.1/src/luajit.c b/luajit-2.1/src/luajit.c index 8a3f848..a4a9131 100644 --- a/luajit-2.1/src/luajit.c +++ b/luajit-2.1/src/luajit.c @@ -1,6 +1,6 @@ /* ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -403,7 +403,7 @@ static int docall(lua_State *L, int narg, int clear) #endif lua_remove(L, base); /* remove traceback function */ /* force a complete garbage collection in case of errors */ - if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); return status; } @@ -439,22 +439,15 @@ static void print_jit_status(lua_State *L) putc('\n', stdout); } -static int getargs(lua_State *L, char **argv, int n) +static void createargtable(lua_State *L, char **argv, int argc, int argf) { - int narg; int i; - int argc = 0; - while (argv[argc]) argc++; /* count total number of arguments */ - narg = argc - (n + 1); /* number of arguments to the script */ - luaL_checkstack(L, narg + 3, "too many arguments to script"); - for (i = n+1; i < argc; i++) - lua_pushstring(L, argv[i]); - lua_createtable(L, narg, n + 1); + lua_createtable(L, argc - argf, argf); for (i = 0; i < argc; i++) { lua_pushstring(L, argv[i]); - lua_rawseti(L, -2, i - n); + lua_rawseti(L, -2, i - argf); } - return narg; + lua_setglobal(L, "arg"); } static int dofile(lua_State *L, const char *name) @@ -546,9 +539,9 @@ static void dotty(lua_State *L) progname = NULL; lua_rl_init(L); while ((status = loadline(L)) != -1) { - if (status == 0) status = docall(L, 0, 0); + if (status == LUA_OK) status = docall(L, 0, 0); report(L, status); - if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ lua_getglobal(L, "print"); lua_insert(L, 1); if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) @@ -564,21 +557,30 @@ static void dotty(lua_State *L) progname = oldprogname; } -static int handle_script(lua_State *L, char **argv, int n) +static int handle_script(lua_State *L, char **argx) { int status; - const char *fname; - int narg = getargs(L, argv, n); /* collect arguments */ - lua_setglobal(L, "arg"); - fname = argv[n]; - if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) + const char *fname = argx[0]; + if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) fname = NULL; /* stdin */ status = luaL_loadfile(L, fname); - lua_insert(L, -(narg+1)); - if (status == 0) + if (status == LUA_OK) { + /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ + int narg = 0; + lua_getglobal(L, "arg"); + if (lua_istable(L, -1)) { + do { + narg++; + lua_rawgeti(L, -narg, narg); + } while (!lua_isnil(L, -1)); + lua_pop(L, 1); + lua_remove(L, -narg); + narg--; + } else { + lua_pop(L, 1); + } status = docall(L, narg, 0); - else - lua_pop(L, narg); + } return report(L, status); } @@ -675,7 +677,8 @@ static int dobytecode(lua_State *L, char **argv) } for (argv++; *argv != NULL; narg++, argv++) lua_pushstring(L, *argv); - return report(L, lua_pcall(L, narg, 0, 0)); + report(L, lua_pcall(L, narg, 0, 0)); + return -1; } /* check that argument has no extra characters at the end */ @@ -696,7 +699,7 @@ static int collectargs(char **argv, int *flags) switch (argv[i][1]) { /* Check option. */ case '-': notail(argv[i]); - return (argv[i+1] != NULL ? i+1 : 0); + return i+1; case '\0': return i; case 'i': @@ -721,23 +724,23 @@ static int collectargs(char **argv, int *flags) case 'b': /* LuaJIT extension */ if (*flags) return -1; *flags |= FLAGS_EXEC; - return 0; + return i+1; case 'E': *flags |= FLAGS_NOENV; break; default: return -1; /* invalid option */ } } - return 0; + return i; } -static int runargs(lua_State *L, char **argv, int n) +static int runargs(lua_State *L, char **argv, int argn) { int i; - for (i = 1; i < n; i++) { + for (i = 1; i < argn; i++) { if (argv[i] == NULL) continue; lua_assert(argv[i][0] == '-'); - switch (argv[i][1]) { /* option */ + switch (argv[i][1]) { case 'e': { const char *chunk = argv[i] + 2; if (*chunk == '\0') chunk = argv[++i]; @@ -751,10 +754,10 @@ static int runargs(lua_State *L, char **argv, int n) if (*filename == '\0') filename = argv[++i]; lua_assert(filename != NULL); if (dolibrary(L, filename)) - return 1; /* stop if file fails */ + return 1; break; } - case 'j': { /* LuaJIT extension */ + case 'j': { /* LuaJIT extension. */ const char *cmd = argv[i] + 2; if (*cmd == '\0') cmd = argv[++i]; lua_assert(cmd != NULL); @@ -762,16 +765,16 @@ static int runargs(lua_State *L, char **argv, int n) return 1; break; } - case 'O': /* LuaJIT extension */ + case 'O': /* LuaJIT extension. */ if (dojitopt(L, argv[i] + 2)) return 1; break; - case 'b': /* LuaJIT extension */ + case 'b': /* LuaJIT extension. */ return dobytecode(L, argv+i); default: break; } } - return 0; + return LUA_OK; } static int handle_luainit(lua_State *L) @@ -782,7 +785,7 @@ static int handle_luainit(lua_State *L) const char *init = getenv(LUA_INIT); #endif if (init == NULL) - return 0; /* status OK */ + return LUA_OK; else if (init[0] == '@') return dofile(L, init+1); else @@ -799,45 +802,57 @@ static int pmain(lua_State *L) { struct Smain *s = &smain; char **argv = s->argv; - int script; + int argn; int flags = 0; globalL = L; if (argv[0] && argv[0][0]) progname = argv[0]; - LUAJIT_VERSION_SYM(); /* linker-enforced version check */ - script = collectargs(argv, &flags); - if (script < 0) { /* invalid args? */ + + LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ + + argn = collectargs(argv, &flags); + if (argn < 0) { /* Invalid args? */ print_usage(); s->status = 1; return 0; } + if ((flags & FLAGS_NOENV)) { lua_pushboolean(L, 1); lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); } - lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ - luaL_openlibs(L); /* open libraries */ + + /* Stop collector during library initialization. */ + lua_gc(L, LUA_GCSTOP, 0); + luaL_openlibs(L); lua_gc(L, LUA_GCRESTART, -1); + + createargtable(L, argv, s->argc, argn); + if (!(flags & FLAGS_NOENV)) { s->status = handle_luainit(L); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } + if ((flags & FLAGS_VERSION)) print_version(); - s->status = runargs(L, argv, (script > 0) ? script : s->argc); - if (s->status != 0) return 0; - if (script) { - s->status = handle_script(L, argv, script); - if (s->status != 0) return 0; + + s->status = runargs(L, argv, argn); + if (s->status != LUA_OK) return 0; + + if (s->argc > argn) { + s->status = handle_script(L, argv + argn); + if (s->status != LUA_OK) return 0; } + if ((flags & FLAGS_INTERACTIVE)) { print_jit_status(L); dotty(L); - } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { + } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { if (lua_stdin_is_tty()) { print_version(); print_jit_status(L); dotty(L); } else { - dofile(L, NULL); /* executes stdin as a file */ + dofile(L, NULL); /* Executes stdin as a file. */ } } return 0; @@ -846,7 +861,7 @@ static int pmain(lua_State *L) int main(int argc, char **argv) { int status; - lua_State *L = lua_open(); /* create state */ + lua_State *L = lua_open(); if (L == NULL) { l_message(argv[0], "cannot create state: not enough memory"); return EXIT_FAILURE; @@ -856,6 +871,6 @@ int main(int argc, char **argv) status = lua_cpcall(L, pmain, NULL); report(L, status); lua_close(L); - return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; + return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/luajit-2.1/src/luajit.h b/luajit-2.1/src/luajit.h index 9604185..c1c801c 100644 --- a/luajit-2.1/src/luajit.h +++ b/luajit-2.1/src/luajit.h @@ -1,7 +1,7 @@ /* ** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ ** -** Copyright (C) 2005-2015 Mike Pall. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ** ** Permission is hereby granted, free of charge, to any person obtaining ** a copy of this software and associated documentation files (the @@ -30,10 +30,10 @@ #include "lua.h" -#define LUAJIT_VERSION "LuaJIT 2.1.0-beta1" +#define LUAJIT_VERSION "LuaJIT 2.1.0-beta2" #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ -#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta1 -#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2015 Mike Pall" +#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta2 +#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall" #define LUAJIT_URL "http://luajit.org/" /* Modes for luaJIT_setmode. */ diff --git a/luajit-2.1/src/lualib.h b/luajit-2.1/src/lualib.h index 96530e7..bfc130a 100644 --- a/luajit-2.1/src/lualib.h +++ b/luajit-2.1/src/lualib.h @@ -1,6 +1,6 @@ /* ** Standard library header. -** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LUALIB_H diff --git a/luajit-2.1/src/msvcbuild.bat b/luajit-2.1/src/msvcbuild.bat index 0360d7e..71bde75 100644 --- a/luajit-2.1/src/msvcbuild.bat +++ b/luajit-2.1/src/msvcbuild.bat @@ -1,5 +1,5 @@ @rem Script to build LuaJIT with MSVC.
-@rem Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
@rem
@rem Either open a "Visual Studio .NET Command Prompt"
@rem (Note that the Express Edition does not contain an x64 compiler)
@@ -14,12 +14,13 @@ @if not defined INCLUDE goto :FAIL
@setlocal
-@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline
@set LJLINK=link /nologo
@set LJMT=mt /nologo
@set LJLIB=lib /nologo /nodefaultlib
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
+@set DASC=vm_x86.dasc
@set LJDLLNAME=lua51.dll
@set LJLIBNAME=lua51.lib
@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
@@ -39,7 +40,12 @@ if exist minilua.exe.manifest^ @set LJARCH=x86
@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
:X64
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+@if "%1" neq "gc64" goto :NOGC64
+@shift
+@set DASC=vm_x64.dasc
+@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64
+:NOGC64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@@ -67,7 +73,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if "%1" neq "debug" goto :NODEBUG
@shift
@set LJCOMPILE=%LJCOMPILE% /Zi
-@set LJLINK=%LJLINK% /debug
+@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no
:NODEBUG
@if "%1"=="amalg" goto :AMALGDLL
@if "%1"=="static" goto :STATIC
@@ -99,6 +105,8 @@ if exist luajit.exe.manifest^ %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe
@del *.obj *.manifest minilua.exe buildvm.exe
+@del host\buildvm_arch.h
+@del lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h
@echo.
@echo === Successfully built LuaJIT for Windows/%LJARCH% ===
diff --git a/luajit-2.1/src/ps4build.bat b/luajit-2.1/src/ps4build.bat index 337a44f..e4a7def 100644 --- a/luajit-2.1/src/ps4build.bat +++ b/luajit-2.1/src/ps4build.bat @@ -2,7 +2,19 @@ @rem Donated to the public domain.
@rem
@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS2015 x64 Native Tools Command Prompt".
+@rem
@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps4build release build, amalgamated, 64-bit GC
+@rem ps4build debug debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug) 32-bit GC
+@rem noamalg (after debug) non-amalgamated build
@if not defined INCLUDE goto :FAIL
@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -15,6 +27,14 @@ @set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set GC64=-DLUAJIT_ENABLE_GC64
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=
+@set DASC=vm_x86.dasc
+:NOGC32
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^ @if not errorlevel 8 goto :FAIL
@set DASMFLAGS=-D P64 -D NO_UNWIND
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
-%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:buildvm.exe buildvm*.obj
@if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if errorlevel 1 goto :BAD
@rem ---- Cross compiler ----
-@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI
+@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
@set INCLUDE=""
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s @if "%1" neq "debug" goto :NODEBUG
@shift
@set LJCOMPILE=%LJCOMPILE% -g -O0
-@set TARGETLIB=libluajitD.a
+@set TARGETLIB=libluajitD_ps4.a
goto :BUILD
:NODEBUG
@set LJCOMPILE=%LJCOMPILE% -O2
-@set TARGETLIB=libluajit.a
+@set TARGETLIB=libluajit_ps4.a
:BUILD
del %TARGETLIB%
-@if "%1"=="amalg" goto :AMALG
+@if "%1" neq "noamalg" goto :AMALG
for %%f in (lj_*.c lib_*.c) do (
%LJCOMPILE% %%f
@if errorlevel 1 goto :BAD
diff --git a/luajit-2.1/src/vm_arm.dasc b/luajit-2.1/src/vm_arm.dasc index af722f9..780cc16 100644 --- a/luajit-2.1/src/vm_arm.dasc +++ b/luajit-2.1/src/vm_arm.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch arm |.section code_op, code_sub @@ -373,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx) | st_vmstate CARG2 | b ->vm_returnc | + |->vm_unwind_ext: // Complete external unwind. +#if !LJ_NO_UNWIND + | push {r0, r1, r2, lr} + | bl extern _Unwind_Complete + | ldr r0, [sp] + | bl extern _Unwind_DeleteException + | pop {r0, r1, r2, lr} + | mov r0, r1 + | bx r2 +#endif + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- diff --git a/luajit-2.1/src/vm_arm64.dasc b/luajit-2.1/src/vm_arm64.dasc index f1251f2..3eaf376 100644 --- a/luajit-2.1/src/vm_arm64.dasc +++ b/luajit-2.1/src/vm_arm64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM64 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch arm64 |.section code_op, code_sub @@ -151,6 +151,21 @@ |.define FRAME_FUNC, #-16 |.define FRAME_PC, #-8 | +|// Endian-specific defines. +|.if ENDIAN_LE +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RB, 3 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RB, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro @@ -236,12 +251,17 @@ |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | -#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field)) +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | |.macro hotcheck, delta -| NYI +| lsr CARG1, PC, #1 +| and CARG1, CARG1, #126 +| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT +| ldrh CARG2w, [GL, CARG1] +| subs CARG2, CARG2, #delta +| strh CARG2w, [GL, CARG1] |.endmacro | |.macro hotloop @@ -712,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx) | cmp CRET1, #1 | bhi ->vmeta_binop |4: - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RB, PC, RB, lsl #2 | sub RB, RB, #0x20000 @@ -869,7 +889,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_meta_for // (lua_State *L, TValue *base) | ldr INSw, [PC, #-4] |.if JIT - | uxtb TMP0, INS + | uxtb TMP0w, INSw |.endif | decode_RA RA, INS | decode_RD RC, INS @@ -1495,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback | checkint CARG1, ->fff_fallback | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. + | // Point to the char inside the integer in the stack slot. + |.if ENDIAN_LE + | mov CARG2, BASE + |.else + | add CARG2, BASE, #7 + |.endif |->fff_newstr: | // CARG2 = str, CARG3 = len. | str BASE, L->base @@ -1698,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx) | ands TMP0, PC, #FRAME_TYPE | and TMP1, PC, #~FRAME_TYPEP | bne >3 - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | lsl RA, RA, #3 | add TMP1, RA, #16 |3: @@ -1732,7 +1757,20 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. - | NYI + |.if JIT + | ldrb CARG1w, GL->hookmask + | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. + | bne >5 + | // Decrement the hookcount for consistency, but always do the call. + | ldr CARG2w, GL->hookcount + | tst CARG1, #HOOK_ACTIVE + | bne >1 + | sub CARG2w, CARG2w, #1 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | beq >1 + | str CARG2w, GL->hookcount + | b >1 + |.endif | |->vm_rethook: // Dispatch target for return hooks. | ldrb TMP2w, GL->hookmask @@ -1774,7 +1812,21 @@ static void build_subroutines(BuildCtx *ctx) | b <4 | |->vm_hotloop: // Hot loop counter underflow. - | NYI + |.if JIT + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). + | add CARG1, GL, #GG_G2DISP+GG_DISP2J + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | str PC, SAVE_PC + | ldr CARG3, LFUNC:CARG3->pc + | mov CARG2, PC + | str L, [GL, #GL_J(L)] + | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] + | str BASE, L->base + | add CARG3, BASE, CARG3, lsl #3 + | str CARG3, L->top + | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | b <3 + |.endif | |->vm_callhook: // Dispatch target for call hooks. | mov CARG2, PC @@ -1804,7 +1856,54 @@ static void build_subroutines(BuildCtx *ctx) | br CRET1 | |->cont_stitch: // Trace stitching. - | NYI + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RBw, SAVE_MULTRES + | ldr INSw, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. + | subs RB, RB, #8 + | decode_RA RC, INS // Call base. + | and CARG3, CARG3, #LJ_GCVMASK + | beq >2 + |1: // Move results down. + | ldr CARG1, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | str CARG1, [BASE, RC, lsl #3] + | add RC, RC, #1 + | bne <1 + |2: + | decode_RA RA, INS + | decode_RB RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | bhi >9 // More results wanted? + | + | ldrh RAw, TRACE:CARG3->traceno + | ldrh RCw, TRACE:CARG3->link + | cmp RCw, RAw + | beq ->cont_nop // Blacklisted. + | cmp RCw, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov CARG1, #GL_J(exitno) + | str RAw, [GL, CARG1] + | mov CARG1, #GL_J(L) + | str L, [GL, CARG1] + | str BASE, L->base + | add CARG1, GL, #GG_G2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | str TISNIL, [BASE, RC, lsl #3] + | add RC, RC, #1 + | b <3 + |.endif | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE @@ -1822,10 +1921,122 @@ static void build_subroutines(BuildCtx *ctx) |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- | + |.macro savex_, a, b + | stp d..a, d..b, [sp, #a*8] + | stp x..a, x..b, [sp, #32*8+a*8] + |.endmacro + | |->vm_exit_handler: - | NYI + |.if JIT + | sub sp, sp, #(64*8) + | savex_, 0, 1 + | savex_, 2, 3 + | savex_, 4, 5 + | savex_, 6, 7 + | savex_, 8, 9 + | savex_, 10, 11 + | savex_, 12, 13 + | savex_, 14, 15 + | savex_, 16, 17 + | savex_, 18, 19 + | savex_, 20, 21 + | savex_, 22, 23 + | savex_, 24, 25 + | savex_, 26, 27 + | savex_, 28, 29 + | stp d30, d31, [sp, #30*8] + | ldr CARG1, [sp, #64*8] // Load original value of lr. + | add CARG3, sp, #64*8 // Recompute original value of sp. + | mv_vmstate CARG4w, EXIT + | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. + | sub CARG1, CARG1, lr + | ldr L, GL->cur_L + | lsr CARG1, CARG1, #2 + | ldr BASE, GL->jit_base + | sub CARG1, CARG1, #2 + | ldr CARG2w, [lr] // Load trace number. + | st_vmstate CARG4w + |.if ENDIAN_BE + | rev32 CARG2, CARG2 + |.endif + | str BASE, L->base + | ubfx CARG2w, CARG2w, #5, #16 + | str CARG1w, [GL, #GL_J(exitno)] + | str CARG2w, [GL, #GL_J(parent)] + | str L, [GL, #GL_J(L)] + | str xzr, GL->jit_base + | add CARG1, GL, #GG_G2J + | mov CARG2, sp + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldr CARG2, L->cframe + | ldr BASE, L->base + | and sp, CARG2, #CFRAME_RAWMASK + | ldr PC, SAVE_PC // Get SAVE_PC. + | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif + | |->vm_exit_interp: - | NYI + | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. + |.if JIT + | ldr L, SAVE_L + |1: + | cmp CARG1w, #0 + | blt >9 // Check for error from exit. + | lsl RC, CARG1, #3 + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | str RCw, SAVE_MULTRES + | str BASE, L->base + | ldr CARG2, LFUNC:CARG2->pc + | str xzr, GL->jit_base + | mv_vmstate CARG4w, INTERP + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | // Modified copy of ins_next which handles function header dispatch, too. + | ldrb RBw, [PC, # OFS_OP] + | ldr INSw, [PC], #4 + | st_vmstate CARG4w + | cmp RBw, #BC_FUNCC+2 // Fast function? + | add TMP1, GL, INS, uxtb #3 + | bhs >4 + |2: + | cmp RBw, #BC_FUNCF // Function header? + | add TMP0, GL, RB, uxtb #3 + | ldr RB, [TMP0, #GG_G2DISP] + | decode_RA RA, INS + | lsr TMP0, INS, #16 + | csel RC, TMP0, RC, lo + | blo >5 + | ldr CARG3, [BASE, FRAME_FUNC] + | sub RC, RC, #8 + | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + |5: + | br RB + | + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3w, [CARG1, #-4] + | decode_RA CARG1, CARG3 + | sub CARG2, BASE, CARG1, lsl #3 + | ldr LFUNC:CARG3, [CARG2, #-32] + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | ldr CARG3, LFUNC:CARG3->pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | neg CARG2, CARG1 + | mov CARG1, L + | bl extern lj_err_throw // (lua_State *L, int errcode) + |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- @@ -1965,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [BASE, RC, lsl #3] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2022,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src1, RC = src2, JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2083,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = str_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | mvn RC, RC - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG2, [KBASE, RC, lsl #3] | add PC, PC, #4 | movn TMP0, #~LJ_TSTR @@ -2111,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = src, RC = num_const (~), JMP with RC = target | ldr CARG1, [BASE, RA, lsl #3] | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr CARG3, [RC] | add PC, PC, #4 | add RB, PC, RB, lsl #2 @@ -2171,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQP; | // RA = src, RC = primitive_type (~), JMP with RC = target | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | add PC, PC, #4 | add RC, RC, #1 | add RB, PC, RB, lsl #2 @@ -2196,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] + | ldrh RBw, [PC, # OFS_RD] | ldr TMP0, [BASE, RC, lsl #3] | add PC, PC, #4 | mov_false TMP1 @@ -2443,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | str PC, SAVE_PC | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] + | ldrb RBw, [PC, #-4+OFS_RB] | ldr BASE, L->base | cbnz CRET1, ->vmeta_binop | ldr TMP0, [BASE, RB, lsl #3] @@ -3074,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_callt | |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] + | ldrb RAw, [PC, #-4+OFS_RA] | sub CARG1, BASE, RA, lsl #3 | ldr LFUNC:CARG1, [CARG1, #-32] | and LFUNC:CARG1, CARG1, #LJ_GCVMASK @@ -3115,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. + | ldrh TMP3w, [PC, # OFS_RD] + | ldr CARG1w, [RA, #-8+LO] // Get index from control var. | add PC, PC, #4 | add TMP3, PC, TMP3, lsl #2 | and TAB:RB, RB, #LJ_GCVMASK @@ -3135,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stp CARG1, TMP0, [RA] | add CARG1, CARG1, #1 |3: - | str CARG1w, [RA, #-8] // Update control var. + | str CARG1w, [RA, #-8+LO] // Update control var. | mov PC, TMP3 |4: | ins_next @@ -3181,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Despecialize bytecode if any of the checks fail. | mov TMP0, #BC_JMP | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] + | strb TMP0w, [PC, #-4+OFS_OP] + | strb TMP1w, [RC, # OFS_OP] | b <1 break; @@ -3387,7 +3598,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FORI) { | csel PC, RC, PC, gt } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] + | mov PC, RC + | ldrh RCw, [RC, #-4+OFS_RD] } else if (op == BC_IFORL) { | csel PC, RC, PC, le } @@ -3428,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FORI) { | csel PC, RC, PC, hi } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] + | ldrh RCw, [RC, #-4+OFS_RD] | bls =>BC_JLOOP } else if (op == BC_IFORL) { | csel PC, RC, PC, ls @@ -3488,7 +3700,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_JLOOP: |.if JIT - | NYI + | // RA = base (ignored), RC = traceno + | ldr CARG1, [GL, #GL_J(trace)] + | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | ldr TRACE:RC, [CARG1, RC, lsl #3] + | st_vmstate CARG2w + | ldr RA, TRACE:RC->mcode + | str BASE, GL->jit_base + | str L, GL->tmpbuf.L + | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. + | br RA |.endif break; @@ -3546,10 +3767,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_IFUNCV: | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ldr CARG1, L->maxstack + | movn TMP0, #~LJ_TFUNC | add TMP2, BASE, RC + | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | add RA, RA, RC | add TMP0, RC, #16+FRAME_VARG - | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC. + | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ldr KBASE, [PC, #-4+PC2PROTO(k)] | cmp RA, CARG1 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. @@ -3736,8 +3959,8 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.uleb128 0x1\n" "\t.sleb128 -8\n" "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ "\t.align 3\n" ".LECIE2:\n\n"); @@ -3748,7 +3971,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .LASFDE3-.Lframe2\n" "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ + "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ diff --git a/luajit-2.1/src/vm_mips.dasc b/luajit-2.1/src/vm_mips.dasc index 134ed56..1afd611 100644 --- a/luajit-2.1/src/vm_mips.dasc +++ b/luajit-2.1/src/vm_mips.dasc @@ -1,6 +1,9 @@ |// Low-level VM code for MIPS CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// +|// MIPS soft-float support contributed by Djordje Kovacevic and +|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. | |.arch mips |.section code_op, code_sub @@ -18,6 +21,12 @@ |// Fixed register assignments for the interpreter. |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| |// The following must be C callee-save (but BASE is often refetched). |.define BASE, r16 // Base of current Lua stack frame. |.define KBASE, r17 // Constants of current Lua function. @@ -25,13 +34,15 @@ |.define DISPATCH, r19 // Opcode dispatch table. |.define LREG, r20 // Register holding lua_State (also in SAVE_L). |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -|// NYI: r22 currently unused. | |.define JGL, r30 // On-trace: global_State + 32768. | |// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNUM, r22 |.define TISNIL, r30 +|.if FPU |.define TOBIT, f30 // 2^52 + 2^51. +|.endif | |// The following temporaries are not saved across C calls, except for RA. |.define RA, r23 // Callee-save. @@ -46,7 +57,7 @@ |.define TMP2, r14 |.define TMP3, r15 | -|// Calling conventions. +|// MIPS o32 calling convention. |.define CFUNCADDR, r25 |.define CARG1, r4 |.define CARG2, r5 @@ -56,13 +67,33 @@ |.define CRET1, r2 |.define CRET2, r3 | +|.if ENDIAN_LE +|.define SFRETLO, CRET1 +|.define SFRETHI, CRET2 +|.define SFARG1LO, CARG1 +|.define SFARG1HI, CARG2 +|.define SFARG2LO, CARG3 +|.define SFARG2HI, CARG4 +|.else +|.define SFRETLO, CRET2 +|.define SFRETHI, CRET1 +|.define SFARG1LO, CARG2 +|.define SFARG1HI, CARG1 +|.define SFARG2LO, CARG4 +|.define SFARG2HI, CARG3 +|.endif +| +|.if FPU |.define FARG1, f12 |.define FARG2, f14 | |.define FRET1, f0 |.define FRET2, f2 +|.endif | |// Stack layout while in interpreter. Must match with lj_frame.h. +|.if FPU // MIPS32 hard-float. +| |.define CFRAME_SPACE, 112 // Delta for sp. | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. @@ -72,6 +103,20 @@ |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. +| +|.else // MIPS32 soft-float +| +|.define CFRAME_SPACE, 64 // Delta for sp. +| +|.define SAVE_ERRF, 76(sp) // 32 bit C frame info. +|.define SAVE_NRES, 72(sp) +|.define SAVE_CFRAME, 68(sp) +|.define SAVE_L, 64(sp) +|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. +|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. +| +|.endif +| |.define SAVE_PC, 20(sp) |.define ARG5, 16(sp) |.define CSAVE_4, 12(sp) @@ -83,43 +128,45 @@ |.define ARG5_OFS, 16 |.define SAVE_MULTRES, ARG5 | +|//----------------------------------------------------------------------- +| |.macro saveregs | addiu sp, sp, -CFRAME_SPACE | sw ra, SAVE_GPR_+9*4(sp) | sw r30, SAVE_GPR_+8*4(sp) -| sdc1 f30, SAVE_FPR_+5*8(sp) +| .FPU sdc1 f30, SAVE_FPR_+5*8(sp) | sw r23, SAVE_GPR_+7*4(sp) | sw r22, SAVE_GPR_+6*4(sp) -| sdc1 f28, SAVE_FPR_+4*8(sp) +| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) | sw r21, SAVE_GPR_+5*4(sp) | sw r20, SAVE_GPR_+4*4(sp) -| sdc1 f26, SAVE_FPR_+3*8(sp) +| .FPU sdc1 f26, SAVE_FPR_+3*8(sp) | sw r19, SAVE_GPR_+3*4(sp) | sw r18, SAVE_GPR_+2*4(sp) -| sdc1 f24, SAVE_FPR_+2*8(sp) +| .FPU sdc1 f24, SAVE_FPR_+2*8(sp) | sw r17, SAVE_GPR_+1*4(sp) | sw r16, SAVE_GPR_+0*4(sp) -| sdc1 f22, SAVE_FPR_+1*8(sp) -| sdc1 f20, SAVE_FPR_+0*8(sp) +| .FPU sdc1 f22, SAVE_FPR_+1*8(sp) +| .FPU sdc1 f20, SAVE_FPR_+0*8(sp) |.endmacro | |.macro restoreregs_ret | lw ra, SAVE_GPR_+9*4(sp) | lw r30, SAVE_GPR_+8*4(sp) -| ldc1 f30, SAVE_FPR_+5*8(sp) +| .FPU ldc1 f30, SAVE_FPR_+5*8(sp) | lw r23, SAVE_GPR_+7*4(sp) | lw r22, SAVE_GPR_+6*4(sp) -| ldc1 f28, SAVE_FPR_+4*8(sp) +| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) | lw r21, SAVE_GPR_+5*4(sp) | lw r20, SAVE_GPR_+4*4(sp) -| ldc1 f26, SAVE_FPR_+3*8(sp) +| .FPU ldc1 f26, SAVE_FPR_+3*8(sp) | lw r19, SAVE_GPR_+3*4(sp) | lw r18, SAVE_GPR_+2*4(sp) -| ldc1 f24, SAVE_FPR_+2*8(sp) +| .FPU ldc1 f24, SAVE_FPR_+2*8(sp) | lw r17, SAVE_GPR_+1*4(sp) | lw r16, SAVE_GPR_+0*4(sp) -| ldc1 f22, SAVE_FPR_+1*8(sp) -| ldc1 f20, SAVE_FPR_+0*8(sp) +| .FPU ldc1 f22, SAVE_FPR_+1*8(sp) +| .FPU ldc1 f20, SAVE_FPR_+0*8(sp) | jr ra | addiu sp, sp, CFRAME_SPACE |.endmacro @@ -153,13 +200,23 @@ |//----------------------------------------------------------------------- | |// Endian-specific defines. -|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) -|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) -|.define HI, LJ_ENDIAN_SELECT(4,0) -|.define LO, LJ_ENDIAN_SELECT(0,4) -|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) -|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) -|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) +|.if ENDIAN_LE +|.define FRAME_PC, -4 +|.define FRAME_FUNC, -8 +|.define HI, 4 +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -4 +|.define HI, 0 +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif | |// Instruction decode. |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro @@ -354,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx) |. sll TMP2, TMP2, 3 |1: | addiu TMP1, TMP1, -8 - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 - | sdc1 f0, 0(BASE) + | sw SFRETHI, HI(BASE) + | sw SFRETLO, LO(BASE) | bnez TMP1, <1 |. addiu BASE, BASE, 8 | @@ -425,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx) | and sp, CARG1, AT |->vm_unwind_ff_eh: // Landing pad for external unwinder. | lw L, SAVE_L - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | li TISNIL, LJ_TNIL | lw BASE, L->base | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | li TMP1, LJ_TFALSE | li_vmstate INTERP | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | addiu RA, BASE, -8 // Results start at BASE-8. | addiu DISPATCH, DISPATCH, GG_G2DISP | sw TMP1, HI(RA) // Prepend false to error message. @@ -496,13 +556,14 @@ static void build_subroutines(BuildCtx *ctx) | sw L, DISPATCH_GL(cur_L)(DISPATCH) | move RA, BASE | lw BASE, L->base + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lw TMP1, L->top | lw PC, FRAME_PC(BASE) - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | subu RD, TMP1, BASE - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | sb r0, L->status - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | li_vmstate INTERP | addiu RD, RD, 8 | st_vmstate @@ -540,13 +601,14 @@ static void build_subroutines(BuildCtx *ctx) |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | sw L, DISPATCH_GL(cur_L)(DISPATCH) | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lw TMP1, L->top - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | addu PC, PC, BASE | subu NARGS8:RC, TMP1, BASE | subu PC, PC, TMP2 // PC = frame delta + frame type - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | li_vmstate INTERP | li TISNIL, LJ_TNIL | st_vmstate @@ -628,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx) |->cont_cat: // RA = resultptr, RB = meta base | lw INS, -4(PC) | addiu CARG2, RB, -16 - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | decode_RB8a MULTRES, INS | decode_RA8a RA, INS | decode_RB8b MULTRES @@ -636,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx) | addu TMP1, BASE, MULTRES | sw BASE, L->base | subu CARG3, CARG2, TMP1 + | sw SFRETHI, HI(CARG2) | bne TMP1, CARG2, ->BC_CAT_Z - |. sdc1 f0, 0(CARG2) + |. sw SFRETLO, LO(CARG2) | addu RA, BASE, RA + | sw SFRETHI, HI(RA) | b ->cont_nop - |. sdc1 f0, 0(RA) + |. sw SFRETLO, LO(RA) | |//-- Table indexing metamethods ----------------------------------------- | @@ -663,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx) |. sw TMP1, HI(CARG3) | |->vmeta_tgetb: // TMP0 = index - | mtc1 TMP0, f0 - | cvt.d.w f0, f0 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sdc1 f0, 0(CARG3) + | sw TMP0, LO(CARG3) + | sw TISNUM, HI(CARG3) | |->vmeta_tgetv: |1: @@ -678,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx) | // Returns TValue * (finished) or NULL (metamethod). | beqz CRET1, >3 |. addiu TMP1, BASE, -FRAME_CONT - | ldc1 f0, 0(CRET1) + | lw SFARG1HI, HI(CRET1) + | lw SFARG2HI, LO(CRET1) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFARG1HI, HI(RA) + | sw SFARG2HI, LO(RA) | ins_next2 | |3: // Call __index metamethod. @@ -697,10 +763,11 @@ static void build_subroutines(BuildCtx *ctx) | call_intern lj_tab_getinth // (GCtab *t, int32_t key) |. nop | // Returns cTValue * or NULL. - | beqz CRET1, >1 - |. nop + | beqz CRET1, ->BC_TGETR_Z + |. move SFARG2HI, TISNIL + | lw SFARG2HI, HI(CRET1) | b ->BC_TGETR_Z - |. ldc1 f0, 0(CRET1) + |. lw SFARG2LO, LO(CRET1) | |//----------------------------------------------------------------------- | @@ -723,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx) |. sw TMP1, HI(CARG3) | |->vmeta_tsetb: // TMP0 = index - | mtc1 TMP0, f0 - | cvt.d.w f0, f0 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sdc1 f0, 0(CARG3) + | sw TMP0, LO(CARG3) + | sw TISNUM, HI(CARG3) | |->vmeta_tsetv: |1: @@ -736,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx) | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |. move CARG1, L | // Returns TValue * (finished) or NULL (metamethod). + | lw SFARG1HI, HI(RA) | beqz CRET1, >3 - |. ldc1 f0, 0(RA) + |. lw SFARG1LO, LO(RA) | // NOBARRIER: lj_meta_tset ensures the table is not black. | ins_next1 - | sdc1 f0, 0(CRET1) + | sw SFARG1HI, HI(CRET1) + | sw SFARG1LO, LO(CRET1) | ins_next2 | |3: // Call __newindex metamethod. @@ -750,7 +818,8 @@ static void build_subroutines(BuildCtx *ctx) | sw PC, -16+HI(BASE) // [cont|PC] | subu PC, BASE, TMP1 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | sdc1 f0, 16(BASE) // Copy value to third argument. + | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. + | sw SFARG1LO, 16+LO(BASE) | b ->vm_call_dispatch_f |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | @@ -767,7 +836,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Comparison metamethods --------------------------------------------- | |->vmeta_comp: - | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. + | // RA/RD point to o1/o2. + | move CARG2, RA + | move CARG3, RD | load_got lj_meta_comp | addiu PC, PC, -4 | sw BASE, L->base @@ -793,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_ra: // RA = resultptr | lbu TMP1, -4+OFS_RA(PC) - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | sll TMP1, TMP1, 3 | addu TMP1, BASE, TMP1 + | sw SFRETHI, HI(TMP1) | b ->cont_nop - |. sdc1 f0, 0(TMP1) + |. sw SFRETLO, LO(TMP1) | |->cont_condt: // RA = resultptr | lw TMP0, HI(RA) @@ -812,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx) |. addiu TMP2, AT, -1 // Branch if result is false. | |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. + | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. | load_got lj_meta_equal + | move CARG2, SFARG1LO + | move CARG3, SFARG2LO + | move CARG4, TMP0 | addiu PC, PC, -4 | sw BASE, L->base | sw PC, SAVE_PC @@ -852,14 +928,16 @@ static void build_subroutines(BuildCtx *ctx) |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_unm: - | move CARG4, CARG3 + | move RC, RB | |->vmeta_arith: | load_got lj_meta_arith | decode_OP1 TMP0, INS | sw BASE, L->base - | sw PC, SAVE_PC | move CARG2, RA + | sw PC, SAVE_PC + | move CARG3, RB + | move CARG4, RC | sw TMP0, ARG5 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) |. move CARG1, L @@ -967,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_1, name |->ff_ .. name: + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback - |. lw CARG3, HI(BASE) - | lw CARG1, LO(BASE) + |. lw SFARG1LO, LO(BASE) |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | lw CARG1, LO(BASE) - | lw CARG2, 8+LO(BASE) + |. lw SFARG2HI, 8+HI(BASE) + | lw SFARG1LO, LO(BASE) + | lw SFARG2LO, 8+LO(BASE) |.endmacro | |.macro .ffunc_n, name // Caveat: has delay slot! |->ff_ .. name: - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) + |.if FPU + | ldc1 FARG1, 0(BASE) + |.else + | lw SFARG1LO, LO(BASE) + |.endif | beqz NARGS8:RC, ->fff_fallback - |. ldc1 FARG1, 0(BASE) - | sltiu AT, CARG3, LJ_TISNUM + |. sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback |.endmacro | |.macro .ffunc_nn, name // Caveat: has delay slot! |->ff_ .. name: | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | ldc1 FARG1, 0(BASE) - | ldc1 FARG2, 8(BASE) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM + |. lw SFARG2HI, 8+HI(BASE) + | sltiu TMP0, SFARG1HI, LJ_TISNUM + |.if FPU + | ldc1 FARG1, 0(BASE) + |.else + | lw SFARG1LO, LO(BASE) + |.endif + | sltiu TMP1, SFARG2HI, LJ_TISNUM + |.if FPU + | ldc1 FARG2, 8(BASE) + |.else + | lw SFARG2LO, 8+LO(BASE) + |.endif | and TMP0, TMP0, TMP1 | beqz TMP0, ->fff_fallback |.endmacro @@ -1016,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: checks ----------------------------------------------- | |.ffunc_1 assert - | sltiu AT, CARG3, LJ_TISTRUECOND + | sltiu AT, SFARG1HI, LJ_TISTRUECOND | beqz AT, ->fff_fallback |. addiu RA, BASE, -8 | lw PC, FRAME_PC(BASE) | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | addu TMP2, RA, NARGS8:RC - | sw CARG3, HI(RA) + | sw SFARG1HI, HI(RA) | addiu TMP1, BASE, 8 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sw CARG1, LO(RA) + |. sw SFARG1LO, LO(RA) |1: - | ldc1 f0, 0(TMP1) - | sdc1 f0, -8(TMP1) + | lw SFRETHI, HI(TMP1) + | lw SFRETLO, LO(TMP1) + | sw SFRETHI, -8+HI(TMP1) + | sw SFRETLO, -8+LO(TMP1) | bne TMP1, TMP2, <1 |. addiu TMP1, TMP1, 8 | b ->fff_res |. nop | |.ffunc type - | lw CARG3, HI(BASE) - | li TMP1, LJ_TISNUM + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback - |. sltiu TMP0, CARG3, LJ_TISNUM - | movz TMP1, CARG3, TMP0 - | not TMP1, TMP1 + |. sltiu TMP0, SFARG1HI, LJ_TISNUM + | movn SFARG1HI, TISNUM, TMP0 + | not TMP1, SFARG1HI | sll TMP1, TMP1, 3 | addu TMP1, CFUNC:RB, TMP1 - | b ->fff_resn - |. ldc1 FRET1, CFUNC:TMP1->upvalue + | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi + | b ->fff_restv + |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo | |//-- Base library: getters and setters --------------------------------- | |.ffunc_1 getmetatable | li AT, LJ_TTAB - | bne CARG3, AT, >6 + | bne SFARG1HI, AT, >6 |. li AT, LJ_TUDATA |1: // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:CARG1, TAB:CARG1->metatable + | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable |2: | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:CARG1, ->fff_restv - |. li CARG3, LJ_TNIL - | lw TMP0, TAB:CARG1->hmask - | li CARG3, LJ_TTAB // Use metatable as default result. + | beqz TAB:SFARG1LO, ->fff_restv + |. li SFARG1HI, LJ_TNIL + | lw TMP0, TAB:SFARG1LO->hmask + | li SFARG1HI, LJ_TTAB // Use metatable as default result. | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:CARG1->node + | lw NODE:TMP2, TAB:SFARG1LO->node | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 @@ -1073,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | lw NODE:TMP3, NODE:TMP2->next | bne CARG4, AT, >4 - |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) + |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) | beq TMP0, STR:RC, >5 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) |4: @@ -1082,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx) | b <3 |. nop |5: - | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. + | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. |. nop - | move CARG3, CARG2 // Return value of mt.__metatable. + | move SFARG1HI, CARG3 // Return value of mt.__metatable. | b ->fff_restv - |. move CARG1, TMP1 + |. move SFARG1LO, TMP1 | |6: - | beq CARG3, AT, <1 - |. sltiu TMP0, CARG3, LJ_TISNUM - | li TMP1, LJ_TISNUM - | movz TMP1, CARG3, TMP0 - | not TMP1, TMP1 + | beq SFARG1HI, AT, <1 + |. sltu AT, TISNUM, SFARG1HI + | movz SFARG1HI, TISNUM, AT + | not TMP1, SFARG1HI | sll TMP1, TMP1, 2 | addu TMP1, DISPATCH, TMP1 | b <2 - |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) + |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) | |.ffunc_2 setmetatable | // Fast path: no mt for table yet and not clearing the mt. | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback - |. addiu CARG4, CARG4, -LJ_TTAB - | lw TAB:TMP1, TAB:CARG1->metatable - | lbu TMP3, TAB:CARG1->marked - | or AT, CARG4, TAB:TMP1 + | bne SFARG1HI, AT, ->fff_fallback + |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB + | lw TAB:TMP1, TAB:SFARG1LO->metatable + | lbu TMP3, TAB:SFARG1LO->marked + | or AT, SFARG2HI, TAB:TMP1 | bnez AT, ->fff_fallback |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) | beqz AT, ->fff_restv - |. sw TAB:CARG2, TAB:CARG1->metatable - | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv + |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable + | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv | |.ffunc rawget | lw CARG4, HI(BASE) @@ -1125,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx) | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |. move CARG1, L | // Returns cTValue *. - | b ->fff_resn - |. ldc1 FRET1, 0(CRET1) + | lw SFARG1HI, HI(CRET1) + | b ->fff_restv + |. lw SFARG1LO, LO(CRET1) | |//-- Base library: conversions ------------------------------------------ | |.ffunc tonumber | // Only handles the number case inline (without a base argument). | lw CARG1, HI(BASE) - | xori AT, NARGS8:RC, 8 - | sltiu CARG1, CARG1, LJ_TISNUM - | movn CARG1, r0, AT - | beqz CARG1, ->fff_fallback // Exactly one number argument. - |. ldc1 FRET1, 0(BASE) - | b ->fff_resn - |. nop + | xori AT, NARGS8:RC, 8 // Exactly one number argument. + | sltu TMP0, TISNUM, CARG1 + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. lw SFARG1HI, HI(BASE) + | b ->fff_restv + |. lw SFARG1LO, LO(BASE) | |.ffunc_1 tostring | // Only handles the string or number case inline. | li AT, LJ_TSTR | // A __tostring method in the string base metatable is ignored. - | beq CARG3, AT, ->fff_restv // String key? + | beq SFARG1HI, AT, ->fff_restv // String key? | // Handle numbers inline, unless a number base metatable is present. |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, TMP1, 1 - | and TMP0, TMP0, TMP1 - | beqz TMP0, ->fff_fallback + | sltu TMP0, TISNUM, SFARG1HI + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback |. sw BASE, L->base // Add frame since C call can throw. | ffgccheck |. sw PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_num + | load_got lj_strfmt_number | move CARG1, L - | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np) + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) |. move CARG2, BASE | // Returns GCstr *. - | li CARG3, LJ_TSTR + | li SFARG1HI, LJ_TSTR | b ->fff_restv - |. move CARG1, CRET1 + |. move SFARG1LO, CRET1 | |//-- Base library: iterators ------------------------------------------- | @@ -1184,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx) |. move CARG1, L | // Returns 0 at end of traversal. | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. li CARG3, LJ_TNIL - | ldc1 f0, 8(BASE) // Copy key and value to results. + |. li SFARG1HI, LJ_TNIL + | lw TMP0, 8+HI(BASE) + | lw TMP1, 8+LO(BASE) | addiu RA, BASE, -8 - | ldc1 f2, 16(BASE) - | li RD, (2+1)*8 - | sdc1 f0, 0(RA) + | lw TMP2, 16+HI(BASE) + | lw TMP3, 16+LO(BASE) + | sw TMP0, HI(RA) + | sw TMP1, LO(RA) + | sw TMP2, 8+HI(RA) + | sw TMP3, 8+LO(RA) | b ->fff_res - |. sdc1 f2, 8(RA) + |. li RD, (2+1)*8 | |.ffunc_1 pairs | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback + | bne SFARG1HI, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) #if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TAB:TMP2, TAB:SFARG1LO->metatable + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | bnez TAB:TMP2, ->fff_fallback #else - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo #endif |. addiu RA, BASE, -8 | sw TISNIL, 8+HI(BASE) - | li RD, (3+1)*8 + | sw TMP0, HI(RA) + | sw TMP1, LO(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. li RD, (3+1)*8 | |.ffunc ipairs_aux | sltiu AT, NARGS8:RC, 16 @@ -1216,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx) | lw TAB:CARG1, LO(BASE) | lw CARG4, 8+HI(BASE) | bnez AT, ->fff_fallback - |. ldc1 FARG2, 8(BASE) - | addiu CARG3, CARG3, -LJ_TTAB - | sltiu AT, CARG4, LJ_TISNUM - | li TMP0, 1 - | movn AT, r0, CARG3 - | mtc1 TMP0, FARG1 - | beqz AT, ->fff_fallback + |. addiu CARG3, CARG3, -LJ_TTAB + | xor CARG4, CARG4, TISNUM + | and AT, CARG3, CARG4 + | bnez AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) - | trunc.w.d FRET1, FARG2 - | cvt.d.w FARG1, FARG1 + | lw TMP2, 8+LO(BASE) | lw TMP0, TAB:CARG1->asize | lw TMP1, TAB:CARG1->array - | mfc1 TMP2, FRET1 - | addiu RA, BASE, -8 - | add.d FARG2, FARG2, FARG1 | addiu TMP2, TMP2, 1 + | sw TISNUM, -8+HI(BASE) | sltu AT, TMP2, TMP0 + | sw TMP2, -8+LO(BASE) + | beqz AT, >2 // Not in array part? + |. addiu RA, BASE, -8 | sll TMP3, TMP2, 3 | addu TMP3, TMP1, TMP3 - | beqz AT, >2 // Not in array part? - |. sdc1 FARG2, 0(RA) - | lw TMP2, HI(TMP3) - | ldc1 f0, 0(TMP3) + | lw TMP1, HI(TMP3) + | lw TMP2, LO(TMP3) |1: - | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. |. li RD, (0+1)*8 - | li RD, (2+1)*8 + | sw TMP1, 8+HI(RA) + | sw TMP2, 8+LO(RA) | b ->fff_res - |. sdc1 f0, 8(RA) + |. li RD, (2+1)*8 + | |2: // Check for empty hash part first. Otherwise call C function. | lw TMP0, TAB:CARG1->hmask | load_got lj_tab_getinth @@ -1255,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx) | // Returns cTValue * or NULL. | beqz CRET1, ->fff_res |. li RD, (0+1)*8 - | lw TMP2, HI(CRET1) + | lw TMP1, HI(CRET1) | b <1 - |. ldc1 f0, 0(CRET1) + |. lw TMP2, LO(CRET1) | |.ffunc_1 ipairs | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback + | bne SFARG1HI, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) #if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TAB:TMP2, TAB:SFARG1LO->metatable + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | bnez TAB:TMP2, ->fff_fallback #else - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo #endif |. addiu RA, BASE, -8 - | sw r0, 8+HI(BASE) + | sw TISNUM, 8+HI(BASE) | sw r0, 8+LO(BASE) - | li RD, (3+1)*8 + | sw TMP0, HI(RA) + | sw TMP1, LO(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. li RD, (3+1)*8 | |//-- Base library: catch errors ---------------------------------------- | @@ -1295,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx) | sltiu AT, NARGS8:RC, 16 | lw CARG4, 8+HI(BASE) | bnez AT, ->fff_fallback - |. ldc1 FARG2, 8(BASE) - | ldc1 FARG1, 0(BASE) + |. lw CARG3, 8+LO(BASE) + | lw CARG1, LO(BASE) + | lw CARG2, HI(BASE) | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | li AT, LJ_TFUNC | move TMP2, BASE @@ -1304,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx) | addiu BASE, BASE, 16 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sdc1 FARG2, 0(TMP2) // Swap function and traceback. + | sw CARG3, LO(TMP2) // Swap function and traceback. + | sw CARG4, HI(TMP2) | andi TMP3, TMP3, 1 - | sdc1 FARG1, 8(TMP2) + | sw CARG1, 8+LO(TMP2) + | sw CARG2, 8+HI(TMP2) | addiu PC, TMP3, 16+FRAME_PCALL | b ->vm_call_dispatch |. addiu NARGS8:RC, NARGS8:RC, -16 @@ -1315,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx) | |.macro coroutine_resume_wrap, resume |.if resume - |.ffunc_1 coroutine_resume + |.ffunc coroutine_resume + | lw CARG3, HI(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. lw CARG1, LO(BASE) | li AT, LJ_TTHREAD | bne CARG3, AT, ->fff_fallback |.else @@ -1350,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx) | move CARG3, CARG2 | sw BASE, L->top |2: // Move args to coroutine. - | ldc1 f0, 0(BASE) + | lw SFRETHI, HI(BASE) + | lw SFRETLO, LO(BASE) | sltu AT, BASE, TMP1 | beqz AT, >3 |. addiu BASE, BASE, 8 - | sdc1 f0, 0(CARG3) + | sw SFRETHI, HI(CARG3) + | sw SFRETLO, LO(CARG3) | b <2 |. addiu CARG3, CARG3, 8 |3: @@ -1380,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx) | sw TMP2, L:RA->top // Clear coroutine stack. | move TMP1, BASE |5: // Move results from coroutine. - | ldc1 f0, 0(TMP2) + | lw SFRETHI, HI(TMP2) + | lw SFRETLO, LO(TMP2) | addiu TMP2, TMP2, 8 | sltu AT, TMP2, TMP3 - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | bnez AT, <5 |. addiu TMP1, TMP1, 8 |6: @@ -1408,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx) |.if resume | addiu TMP3, TMP3, -8 | li TMP1, LJ_TFALSE - | ldc1 f0, 0(TMP3) + | lw SFRETHI, HI(TMP3) + | lw SFRETLO, LO(TMP3) | sw TMP3, L:RA->top // Remove error from coroutine stack. | li RD, (2+1)*8 | sw TMP1, -8+HI(BASE) // Prepend false to results. | addiu RA, BASE, -8 - | sdc1 f0, 0(BASE) // Copy error message. + | sw SFRETHI, HI(BASE) // Copy error message. + | sw SFRETLO, LO(BASE) | b <7 |. andi TMP0, PC, FRAME_TYPE |.else @@ -1449,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Math library ------------------------------------------------------- | - |.ffunc_n math_abs - |. abs.d FRET1, FARG1 - |->fff_resn: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - | b ->fff_res1 - |. sdc1 FRET1, -8(BASE) + |.ffunc_1 math_abs + | bne SFARG1HI, TISNUM, >1 + |. sra TMP0, SFARG1LO, 31 + | xor TMP1, SFARG1LO, TMP0 + | subu SFARG1LO, TMP1, TMP0 + | bgez SFARG1LO, ->fff_restv + |. nop + | lui SFARG1HI, 0x41e0 // 2^31 as a double. + | b ->fff_restv + |. li SFARG1LO, 0 + |1: + | sltiu AT, SFARG1HI, LJ_TISNUM + | beqz AT, ->fff_fallback + |. sll SFARG1HI, SFARG1HI, 1 + | srl SFARG1HI, SFARG1HI, 1 + |// fallthrough | |->fff_restv: - | // CARG3/CARG1 = TValue result. + | // SFARG1LO/SFARG1HI = TValue result. | lw PC, FRAME_PC(BASE) - | sw CARG3, -8+HI(BASE) + | sw SFARG1HI, -8+HI(BASE) | addiu RA, BASE, -8 - | sw CARG1, -8+LO(BASE) + | sw SFARG1LO, -8+LO(BASE) |->fff_res1: | // RA = results, PC = return. | li RD, (1+1)*8 @@ -1491,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx) |. sw TISNIL, -8+HI(TMP1) | |.macro math_extern, func - |->ff_math_ .. func: - | lw CARG3, HI(BASE) + | .ffunc math_ .. func + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback |. load_got func - | sltiu AT, CARG3, LJ_TISNUM + | sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. nop - | call_extern + |.if FPU |. ldc1 FARG1, 0(BASE) + |.else + |. lw SFARG1LO, LO(BASE) + |.endif + | call_extern + |. nop | b ->fff_resn |. nop |.endmacro @@ -1513,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx) |. nop |.endmacro | + |// TODO: Return integer type if result is integer (own sf implementation). |.macro math_round, func - | .ffunc_n math_ .. func - |. nop + |->ff_math_ .. func: + | lw SFARG1HI, HI(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. lw SFARG1LO, LO(BASE) + | beq SFARG1HI, TISNUM, ->fff_restv + |. sltu AT, SFARG1HI, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) | bal ->vm_ .. func + |.else + |. load_got func + | call_extern + |.endif |. nop | b ->fff_resn |. nop @@ -1526,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx) | math_round ceil | |.ffunc math_log - | lw CARG3, HI(BASE) | li AT, 8 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. load_got log - | sltiu AT, CARG3, LJ_TISNUM + |. lw SFARG1HI, HI(BASE) + | sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. nop + |. load_got log + |.if FPU | call_extern |. ldc1 FARG1, 0(BASE) + |.else + | call_extern + |. lw SFARG1LO, LO(BASE) + |.endif | b ->fff_resn |. nop | @@ -1553,17 +1692,43 @@ static void build_subroutines(BuildCtx *ctx) | math_extern2 atan2 | math_extern2 fmod | + |.if FPU |.ffunc_n math_sqrt |. sqrt.d FRET1, FARG1 - | b ->fff_resn - |. nop + |// fallthrough to ->fff_resn + |.else + | math_extern sqrt + |.endif + | + |->fff_resn: + | lw PC, FRAME_PC(BASE) + | addiu RA, BASE, -8 + |.if FPU + | b ->fff_res1 + |. sdc1 FRET1, -8(BASE) + |.else + | sw SFRETHI, -8+HI(BASE) + | b ->fff_res1 + |. sw SFRETLO, -8+LO(BASE) + |.endif + | | - |.ffunc_nn math_ldexp - | trunc.w.d FARG2, FARG2 + |.ffunc math_ldexp + | sltiu AT, NARGS8:RC, 16 + | lw SFARG1HI, HI(BASE) + | bnez AT, ->fff_fallback + |. lw CARG4, 8+HI(BASE) + | bne CARG4, TISNUM, ->fff_fallback | load_got ldexp - | mfc1 CARG3, FARG2 + |. sltu AT, SFARG1HI, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + |.else + |. lw SFARG1LO, LO(BASE) + |.endif | call_extern - |. nop + |. lw CARG3, 8+LO(BASE) | b ->fff_resn |. nop | @@ -1574,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx) |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | addiu RA, BASE, -8 + |.if FPU | mtc1 TMP1, FARG2 | sdc1 FRET1, 0(RA) | cvt.d.w FARG2, FARG2 | sdc1 FARG2, 8(RA) + |.else + | sw SFRETLO, LO(RA) + | sw SFRETHI, HI(RA) + | sw TMP1, 8+LO(RA) + | sw TISNUM, 8+HI(RA) + |.endif | b ->fff_res |. li RD, (2+1)*8 | @@ -1587,39 +1759,98 @@ static void build_subroutines(BuildCtx *ctx) | call_extern |. addiu CARG3, BASE, -8 | addiu RA, BASE, -8 + |.if FPU | sdc1 FRET1, 0(BASE) + |.else + | sw SFRETLO, LO(BASE) + | sw SFRETHI, HI(BASE) + |.endif | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, ismax - |->ff_ .. name: - | lw CARG3, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. ldc1 FRET1, 0(BASE) - | sltiu AT, CARG3, LJ_TISNUM + |.macro math_minmax, name, intins, fpins + | .ffunc_1 name + | addu TMP3, BASE, NARGS8:RC + | bne SFARG1HI, TISNUM, >5 + |. addiu TMP2, BASE, 8 + |1: // Handle integers. + |. lw SFARG2HI, HI(TMP2) + | beq TMP2, TMP3, ->fff_restv + |. lw SFARG2LO, LO(TMP2) + | bne SFARG2HI, TISNUM, >3 + |. slt AT, SFARG1LO, SFARG2LO + | intins SFARG1LO, SFARG2LO, AT + | b <1 + |. addiu TMP2, TMP2, 8 + | + |3: // Convert intermediate result to number and continue with number loop. + | sltiu AT, SFARG2HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. addu TMP2, BASE, NARGS8:RC - | addiu TMP1, BASE, 8 - | beq TMP1, TMP2, ->fff_resn - |1: - |. lw CARG3, HI(TMP1) - | ldc1 FARG1, 0(TMP1) - | addiu TMP1, TMP1, 8 - | sltiu AT, CARG3, LJ_TISNUM + |.if FPU + |. mtc1 SFARG1LO, FRET1 + | cvt.d.w FRET1, FRET1 + | b >7 + |. ldc1 FARG1, 0(TMP2) + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b >7 + |. nop + |.endif + | + |5: + |. sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback - |.if ismax - |. c.olt.d FARG1, FRET1 + |.if FPU + |. ldc1 FRET1, 0(BASE) + |.endif + | + |6: // Handle numbers. + |. lw SFARG2HI, HI(TMP2) + |.if FPU + | beq TMP2, TMP3, ->fff_resn |.else - |. c.olt.d FRET1, FARG1 + | beq TMP2, TMP3, ->fff_restv |.endif - | bne TMP1, TMP2, <1 - |. movf.d FRET1, FARG1 - | b ->fff_resn + |. sltiu AT, SFARG2HI, LJ_TISNUM + | beqz AT, >8 + |.if FPU + |. ldc1 FARG1, 0(TMP2) + |.else + |. lw SFARG2LO, LO(TMP2) + |.endif + |7: + |.if FPU + | c.olt.d FRET1, FARG1 + | fpins FRET1, FARG1 + |.else + | bal ->vm_sfcmpolt |. nop + | intins SFARG1LO, SFARG2LO, CRET1 + | intins SFARG1HI, SFARG2HI, CRET1 + |.endif + | b <6 + |. addiu TMP2, TMP2, 8 + | + |8: // Convert integer to number and continue with number loop. + | bne SFARG2HI, TISNUM, ->fff_fallback + |.if FPU + |. lwc1 FARG1, LO(TMP2) + | b <7 + |. cvt.d.w FARG1, FARG1 + |.else + |. nop + | bal ->vm_sfi2d_2 + |. nop + | b <7 + |. nop + |.endif + | |.endmacro | - | math_minmax math_min, 0 - | math_minmax math_max, 1 + | math_minmax math_min, movz, movf.d + | math_minmax math_max, movn, movt.d | |//-- String library ----------------------------------------------------- | @@ -1632,32 +1863,31 @@ static void build_subroutines(BuildCtx *ctx) | bnez AT, ->fff_fallback // Need exactly 1 string argument. |. nop | lw TMP0, STR:CARG1->len - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | addiu RA, BASE, -8 + | lw PC, FRAME_PC(BASE) | sltu RD, r0, TMP0 - | mtc1 TMP1, f0 + | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | addiu RD, RD, 1 - | cvt.d.w f0, f0 - | lw PC, FRAME_PC(BASE) | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | sw TISNUM, HI(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. sw TMP1, LO(RA) | |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck + |. nop | lw CARG3, HI(BASE) - | ldc1 FARG1, 0(BASE) - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->fff_fallback + | lw CARG1, LO(BASE) + | li TMP1, 255 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. + | xor TMP0, CARG3, TISNUM // Integer. + | sltu TMP1, TMP1, CARG1 // !(255 < n). + | or AT, AT, TMP0 + | or AT, AT, TMP1 + | bnez AT, ->fff_fallback |. li CARG3, 1 - | trunc.w.d FARG1, FARG1 | addiu CARG2, sp, ARG5_OFS - | sltiu AT, TMP0, 256 - | mfc1 TMP0, FARG1 - | beqz AT, ->fff_fallback - |. sw TMP0, ARG5 + | sb CARG1, ARG5 |->fff_newstr: | load_got lj_str_new | sw BASE, L->base @@ -1667,34 +1897,29 @@ static void build_subroutines(BuildCtx *ctx) | // Returns GCstr *. | lw BASE, L->base |->fff_resstr: - | move CARG1, CRET1 + | move SFARG1LO, CRET1 | b ->fff_restv - |. li CARG3, LJ_TSTR + |. li SFARG1HI, LJ_TSTR | |.ffunc string_sub | ffgccheck + |. nop | addiu AT, NARGS8:RC, -16 | lw CARG3, 16+HI(BASE) - | ldc1 f0, 16(BASE) | lw TMP0, HI(BASE) | lw STR:CARG1, LO(BASE) | bltz AT, ->fff_fallback - | lw CARG2, 8+HI(BASE) - | ldc1 f2, 8(BASE) + |. lw CARG2, 8+HI(BASE) | beqz AT, >1 |. li CARG4, -1 - | trunc.w.d f0, f0 - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->fff_fallback - |. mfc1 CARG4, f0 + | bne CARG3, TISNUM, ->fff_fallback + |. lw CARG4, 16+LO(BASE) |1: - | sltiu AT, CARG2, LJ_TISNUM - | beqz AT, ->fff_fallback + | bne CARG2, TISNUM, ->fff_fallback |. li AT, LJ_TSTR - | trunc.w.d f2, f2 | bne TMP0, AT, ->fff_fallback - |. lw CARG2, STR:CARG1->len - | mfc1 CARG3, f2 + |. lw CARG3, 8+LO(BASE) + | lw CARG2, STR:CARG1->len | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | slt AT, CARG4, r0 | addiu TMP0, CARG2, 1 @@ -1716,13 +1941,14 @@ static void build_subroutines(BuildCtx *ctx) | bgez CARG3, ->fff_newstr |. addiu CARG3, CARG3, 1 // len++ |->fff_emptystr: // Return empty string. - | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) + | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) | b ->fff_restv - |. li CARG3, LJ_TSTR + |. li SFARG1HI, LJ_TSTR | |.macro ffstring_op, name | .ffunc string_ .. name | ffgccheck + |. nop | lw CARG3, HI(BASE) | lw STR:CARG2, LO(BASE) | beqz NARGS8:RC, ->fff_fallback @@ -1749,27 +1975,96 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Bit library -------------------------------------------------------- | + |->vm_tobit_fb: + | beqz TMP1, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | add.d FARG1, FARG1, TOBIT + | jr ra + |. mfc1 CRET1, FARG1 + |.else + |// FP number to bit conversion for soft-float. + |->vm_tobit: + | sll TMP0, SFARG1HI, 1 + | lui AT, 0x0020 + | addu TMP0, TMP0, AT + | slt AT, TMP0, r0 + | movz SFARG1LO, r0, AT + | beqz AT, >2 + |. li TMP1, 0x3e0 + | not TMP1, TMP1 + | sra TMP0, TMP0, 21 + | subu TMP0, TMP1, TMP0 + | slt AT, TMP0, r0 + | bnez AT, >1 + |. sll TMP1, SFARG1HI, 11 + | lui AT, 0x8000 + | or TMP1, TMP1, AT + | srl AT, SFARG1LO, 21 + | or TMP1, TMP1, AT + | slt AT, SFARG1HI, r0 + | beqz AT, >2 + |. srlv SFARG1LO, TMP1, TMP0 + | subu SFARG1LO, r0, SFARG1LO + |2: + | jr ra + |. move CRET1, SFARG1LO + |1: + | addiu TMP0, TMP0, 21 + | srlv TMP1, SFARG1LO, TMP0 + | li AT, 20 + | subu TMP0, AT, TMP0 + | sll SFARG1LO, SFARG1HI, 12 + | sllv AT, SFARG1LO, TMP0 + | or SFARG1LO, TMP1, AT + | slt AT, SFARG1HI, r0 + | beqz AT, <2 + |. nop + | jr ra + |. subu CRET1, r0, SFARG1LO + |.endif + | |.macro .ffunc_bit, name - | .ffunc_n bit_..name - |. add.d FARG1, FARG1, TOBIT - | mfc1 CRET1, FARG1 + | .ffunc_1 bit_..name + | beq SFARG1HI, TISNUM, >6 + |. move CRET1, SFARG1LO + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + |6: |.endmacro | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name - | addiu TMP1, BASE, 8 - | addu TMP2, BASE, NARGS8:RC + | addiu TMP2, BASE, 8 + | addu TMP3, BASE, NARGS8:RC |1: - | lw CARG4, HI(TMP1) - | beq TMP1, TMP2, ->fff_resi - |. ldc1 FARG1, 0(TMP1) - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, ->fff_fallback - | add.d FARG1, FARG1, TOBIT - | mfc1 CARG2, FARG1 - | ins CRET1, CRET1, CARG2 + | lw SFARG1HI, HI(TMP2) + | beq TMP2, TMP3, ->fff_resi + |. lw SFARG1LO, LO(TMP2) + |.if FPU + | bne SFARG1HI, TISNUM, >2 + |. addiu TMP2, TMP2, 8 | b <1 - |. addiu TMP1, TMP1, 8 + |. ins CRET1, CRET1, SFARG1LO + |2: + | ldc1 FARG1, -8(TMP2) + | sltu TMP1, SFARG1HI, TISNUM + | beqz TMP1, ->fff_fallback + |. add.d FARG1, FARG1, TOBIT + | mfc1 SFARG1LO, FARG1 + | b <1 + |. ins CRET1, CRET1, SFARG1LO + |.else + | beq SFARG1HI, TISNUM, >2 + |. move CRET2, CRET1 + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + | move SFARG1LO, CRET2 + |2: + | ins CRET1, CRET1, SFARG1LO + | b <1 + |. addiu TMP2, TMP2, 8 + |.endif |.endmacro | |.ffunc_bit_op band, and @@ -1793,24 +2088,28 @@ static void build_subroutines(BuildCtx *ctx) |. not CRET1, CRET1 | |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_nn bit_..name - |. add.d FARG1, FARG1, TOBIT - | add.d FARG2, FARG2, TOBIT - | mfc1 CARG1, FARG1 - | mfc1 CARG2, FARG2 + | .ffunc_2 bit_..name + | beq SFARG1HI, TISNUM, >1 + |. nop + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + | move SFARG1LO, CRET1 + |1: + | bne SFARG2HI, TISNUM, ->fff_fallback + |. nop |.if shmod == 1 | li AT, 32 - | subu TMP0, AT, CARG2 - | sllv CARG2, CARG1, CARG2 - | srlv CARG1, CARG1, TMP0 + | subu TMP0, AT, SFARG2LO + | sllv SFARG2LO, SFARG1LO, SFARG2LO + | srlv SFARG1LO, SFARG1LO, TMP0 |.elif shmod == 2 | li AT, 32 - | subu TMP0, AT, CARG2 - | srlv CARG2, CARG1, CARG2 - | sllv CARG1, CARG1, TMP0 + | subu TMP0, AT, SFARG2LO + | srlv SFARG2LO, SFARG1LO, SFARG2LO + | sllv SFARG1LO, SFARG1LO, TMP0 |.endif | b ->fff_resi - |. ins CRET1, CARG1, CARG2 + |. ins CRET1, SFARG1LO, SFARG2LO |.endmacro | |.ffunc_bit_sh lshift, sllv, 0 @@ -1822,9 +2121,11 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_bit tobit |->fff_resi: - | mtc1 CRET1, FRET1 - | b ->fff_resn - |. cvt.d.w FRET1, FRET1 + | lw PC, FRAME_PC(BASE) + | addiu RA, BASE, -8 + | sw TISNUM, -8+HI(BASE) + | b ->fff_res1 + |. sw CRET1, -8+LO(BASE) | |//----------------------------------------------------------------------- | @@ -2022,10 +2323,12 @@ static void build_subroutines(BuildCtx *ctx) | beqz AT, >2 |. addu RC, BASE, RC // Call base. |1: // Move results down. - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu AT, AT, -8 | addiu RA, RA, 8 - | sdc1 f0, 0(RC) + | sw SFRETHI, HI(RC) + | sw SFRETLO, LO(RC) | bnez AT, <1 |. addiu RC, RC, 8 |2: @@ -2082,14 +2385,23 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |.macro savex_, a, b + |.if FPU | sdc1 f..a, 16+a*8(sp) | sw r..a, 16+32*8+a*4(sp) | sw r..b, 16+32*8+b*4(sp) + |.else + | sw r..a, 16+a*4(sp) + | sw r..b, 16+b*4(sp) + |.endif |.endmacro | |->vm_exit_handler: |.if JIT + |.if FPU | addiu sp, sp, -(16+32*8+32*4) + |.else + | addiu sp, sp, -(16+32*4) + |.endif | savex_ 0, 1 | savex_ 2, 3 | savex_ 4, 5 @@ -2104,17 +2416,25 @@ static void build_subroutines(BuildCtx *ctx) | savex_ 22, 23 | savex_ 24, 25 | savex_ 26, 27 + |.if FPU | sdc1 f28, 16+28*8(sp) - | sw r28, 16+32*8+28*4(sp) | sdc1 f30, 16+30*8(sp) + | sw r28, 16+32*8+28*4(sp) | sw r30, 16+32*8+30*4(sp) | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. + | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. + | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP + |.else + | sw r28, 16+28*4(sp) + | sw r30, 16+30*4(sp) + | sw r0, 16+31*4(sp) // Clear RID_TMP. + | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. + | sw TMP2, 16+29*4(sp) // Store sp in RID_SP + |.endif | li_vmstate EXIT - | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | addiu DISPATCH, JGL, -GG_DISP2G-32768 | lw TMP1, 0(TMP2) // Load exit number. | st_vmstate - | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. | lw L, DISPATCH_GL(cur_L)(DISPATCH) | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) | load_got lj_trace_exit @@ -2144,15 +2464,16 @@ static void build_subroutines(BuildCtx *ctx) |1: | bltz CRET1, >9 // Check for error from exit. |. lw LFUNC:RB, FRAME_FUNC(BASE) - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | sll MULTRES, CRET1, 3 | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | sw MULTRES, SAVE_MULTRES - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | lw TMP1, LFUNC:RB->pc | sw r0, DISPATCH_GL(jit_base)(DISPATCH) | lw KBASE, PC2PROTO(k)(TMP1) - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) | addiu PC, PC, 4 @@ -2160,7 +2481,7 @@ static void build_subroutines(BuildCtx *ctx) | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | decode_OP4a TMP1, INS | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? + | sltiu TMP2, TMP1, BC_FUNCF*4 | addu TMP0, DISPATCH, TMP1 | decode_RD8a RD, INS | lw AT, 0(TMP0) @@ -2201,8 +2522,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Math helper functions ---------------------------------------------- |//----------------------------------------------------------------------- | + |// Hard-float round to integer. |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round, func + |.macro vm_round_hf, func | lui TMP0, 0x4330 // Hiword of 2^52 (double). | mtc1 r0, f4 | mtc1 TMP0, f5 @@ -2244,6 +2566,12 @@ static void build_subroutines(BuildCtx *ctx) |. mov.d FRET1, FARG1 |.endmacro | + |.macro vm_round, func + |.if FPU + | vm_round_hf, func + |.endif + |.endmacro + | |->vm_floor: | vm_round floor |->vm_ceil: @@ -2253,6 +2581,178 @@ static void build_subroutines(BuildCtx *ctx) | vm_round trunc |.endif | + |// Soft-float integer to number conversion. + |.macro sfi2d, AHI, ALO + |.if not FPU + | beqz ALO, >9 // Handle zero first. + |. sra TMP0, ALO, 31 + | xor TMP1, ALO, TMP0 + | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. + | clz AHI, TMP1 + | andi TMP0, TMP0, 0x800 // Mask sign bit. + | li AT, 0x3ff+31-1 + | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. + | subu AHI, AT, AHI // Exponent - 1 in AHI. + | sll ALO, TMP1, 21 + | or AHI, AHI, TMP0 // Sign | Exponent. + | srl TMP1, TMP1, 11 + | sll AHI, AHI, 20 // Align left. + | jr ra + |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. + |9: + | jr ra + |. li AHI, 0 + |.endif + |.endmacro + | + |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d SFARG1HI, SFARG1LO + | + |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d SFARG2HI, SFARG2LO + | + |// Soft-float comparison. Equivalent to c.eq.d. + |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpeq: + |.if not FPU + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 1. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. xor TMP0, SFARG1HI, SFARG2HI + | xor TMP1, SFARG1LO, SFARG2LO + | or AT, TMP0, TMP1 + | jr ra + |. sltiu CRET1, AT, 1 // Same values: return 1. + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. + |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. + |->vm_sfcmpult: + |.if not FPU + | b >1 + |. li CRET2, 1 + |.endif + | + |->vm_sfcmpolt: + |.if not FPU + | li CRET2, 0 + |1: + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG1HI, SFARG2HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG1HI, SFARG2HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |5: // Swap conditions if both operands are negative. + | beq SFARG1HI, SFARG2HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: + |.if not FPU + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 1. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. and AT, SFARG1HI, SFARG2HI + | xor AT, AT, TMP3 + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG1HI, SFARG2HI, >6 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG1HI, SFARG2HI, >6 + |. sltu CRET1, SFARG1LO, SFARG2LO + | slt CRET1, SFARG1HI, SFARG2HI + |6: + | jr ra + |. nop + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |.macro sfmin_max, name, intins + |->vm_sf .. name: + |.if JIT and not FPU + | move TMP2, ra + | bal ->vm_sfcmpolt + |. nop + | move TMP0, CRET1 + | move SFRETHI, SFARG1HI + | move SFRETLO, SFARG1LO + | move ra, TMP2 + | intins SFRETHI, SFARG2HI, TMP0 + | jr ra + |. intins SFRETLO, SFARG2LO, TMP0 + |.endif + |.endmacro + | + | sfmin_max min, movz + | sfmin_max max, movn + | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- @@ -2272,10 +2772,10 @@ static void build_subroutines(BuildCtx *ctx) | sw r1, CTSTATE->cb.slot | sw CARG1, CTSTATE->cb.gpr[0] | sw CARG2, CTSTATE->cb.gpr[1] - | sdc1 FARG1, CTSTATE->cb.fpr[0] + | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] | sw CARG3, CTSTATE->cb.gpr[2] | sw CARG4, CTSTATE->cb.gpr[3] - | sdc1 FARG2, CTSTATE->cb.fpr[1] + | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] | addiu TMP0, sp, CFRAME_SPACE+16 | sw TMP0, CTSTATE->cb.stack | sw r0, SAVE_PC // Any value outside of bytecode is ok. @@ -2285,15 +2785,16 @@ static void build_subroutines(BuildCtx *ctx) | // Returns lua_State *. | lw BASE, L:CRET1->base | lw RC, L:CRET1->top + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | move L, CRET1 - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lw LFUNC:RB, FRAME_FUNC(BASE) - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | li_vmstate INTERP | li TISNIL, LJ_TNIL | subu RC, RC, BASE | st_vmstate - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | ins_callt |.endif | @@ -2307,11 +2808,11 @@ static void build_subroutines(BuildCtx *ctx) | move CARG2, RA | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) |. move CARG1, CTSTATE + | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | lw CRET1, CTSTATE->cb.gpr[0] - | ldc1 FRET1, CTSTATE->cb.fpr[0] - | lw CRET2, CTSTATE->cb.gpr[1] + | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] | b ->vm_leave_unw - |. ldc1 FRET2, CTSTATE->cb.fpr[1] + |. lw CRET2, CTSTATE->cb.gpr[1] |.endif | |->vm_ffi_call: // Call C function via FFI. @@ -2343,8 +2844,8 @@ static void build_subroutines(BuildCtx *ctx) | lw CARG2, CCSTATE->gpr[1] | lw CARG3, CCSTATE->gpr[2] | lw CARG4, CCSTATE->gpr[3] - | ldc1 FARG1, CCSTATE->fpr[0] - | ldc1 FARG2, CCSTATE->fpr[1] + | .FPU ldc1 FARG1, CCSTATE->fpr[0] + | .FPU ldc1 FARG2, CCSTATE->fpr[1] | jalr CFUNCADDR |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | lw CCSTATE:TMP1, -12(r16) @@ -2352,8 +2853,13 @@ static void build_subroutines(BuildCtx *ctx) | lw ra, -4(r16) | sw CRET1, CCSTATE:TMP1->gpr[0] | sw CRET2, CCSTATE:TMP1->gpr[1] + |.if FPU | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | sdc1 FRET2, CCSTATE:TMP1->fpr[1] + |.else + | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. + | sw CARG2, CCSTATE:TMP1->gpr[3] + |.endif | move sp, r16 | jr ra |. move r16, TMP2 @@ -2377,82 +2883,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target - | addu CARG2, BASE, RA - | addu CARG3, BASE, RD - | lw TMP0, HI(CARG2) - | lw TMP1, HI(CARG3) - | ldc1 f0, 0(CARG2) - | ldc1 f2, 0(CARG3) - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM + |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp + | addu RA, BASE, RA + | addu RD, BASE, RD + | lw RAHI, HI(RA) + | lw RDHI, HI(RD) | lhu TMP2, OFS_RD(PC) - | and TMP0, TMP0, TMP1 | addiu PC, PC, 4 - | beqz TMP0, ->vmeta_comp - |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b TMP2 - | addu TMP2, TMP2, TMP1 - if (op == BC_ISLT || op == BC_ISGE) { - | c.olt.d f0, f2 - } else { - | c.ole.d f0, f2 - } - if (op == BC_ISLT || op == BC_ISLE) { - | movf TMP2, r0 - } else { - | movt TMP2, r0 - } - | addu PC, PC, TMP2 + | bne RAHI, TISNUM, >2 + |. lw RALO, LO(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | lw RDLO, LO(RD) + | bne RDHI, TISNUM, >5 + |. decode_RD4b TMP2 + | slt AT, SFARG1LO, SFARG2LO + | addu TMP2, TMP2, TMP3 + | movop TMP2, r0, AT |1: + | addu PC, PC, TMP2 | ins_next + | + |2: // RA is not an integer. + | sltiu AT, RAHI, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sltiu AT, RDHI, LJ_TISNUM + |.if FPU + | ldc1 FRA, 0(RA) + | ldc1 FRD, 0(RD) + |.else + | lw RDLO, LO(RD) + |.endif + | beqz AT, >4 + |. decode_RD4b TMP2 + |3: // RA and RD are both numbers. + |.if FPU + | fcomp f20, f22 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 + |. movop TMP2, r0, CRET1 + |.endif + | + |4: // RA is a number, RD is not a number. + | bne RDHI, TISNUM, ->vmeta_comp + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FRD, LO(RD) + | b <3 + |. cvt.d.w FRD, FRD + |.else + |. nop + |.if "RDHI" == "SFARG1HI" + | bal ->vm_sfi2d_1 + |.else + | bal ->vm_sfi2d_2 + |.endif + |. nop + | b <3 + |. nop + |.endif + | + |5: // RA is an integer, RD is not an integer + | sltiu AT, RDHI, LJ_TISNUM + | beqz AT, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. mtc1 RALO, FRA + | ldc1 FRD, 0(RD) + | b <3 + | cvt.d.w FRA, FRA + |.else + |. nop + |.if "RAHI" == "SFARG1HI" + | bal ->vm_sfi2d_1 + |.else + | bal ->vm_sfi2d_2 + |.endif + |. nop + | b <3 + |. nop + |.endif + |.endmacro + | + if (op == BC_ISLT) { + | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult + } else { + | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult + } break; case BC_ISEQV: case BC_ISNEV: vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | ldc1 f0, 0(RA) + | addiu PC, PC, 4 | addu RD, BASE, RD + | lw SFARG1HI, HI(RA) | lhu TMP2, -4+OFS_RD(PC) - | lw TMP1, HI(RD) - | ldc1 f2, 0(RD) + | lw SFARG2HI, HI(RD) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, TMP0, LJ_TISNUM - | sltiu CARG1, TMP1, LJ_TISNUM - | decode_RD4b TMP2 - | and AT, AT, CARG1 - | beqz AT, >5 - |. addu TMP2, TMP2, TMP3 - | c.eq.d f0, f2 + | sltu AT, TISNUM, SFARG1HI + | sltu TMP0, TISNUM, SFARG2HI + | or AT, AT, TMP0 if (vk) { - | movf TMP2, r0 + | beqz AT, ->BC_ISEQN_Z } else { - | movt TMP2, r0 + | beqz AT, ->BC_ISNEN_Z } - |1: - | addu PC, PC, TMP2 - | ins_next - |5: // Either or both types are not numbers. - | lw CARG2, LO(RA) - | lw CARG3, LO(RD) + |. decode_RD4b TMP2 + | // Either or both types are not numbers. + | lw SFARG1LO, LO(RA) + | lw SFARG2LO, LO(RD) + | addu TMP2, TMP2, TMP3 |.if FFI | li TMP3, LJ_TCDATA - | beq TMP0, TMP3, ->vmeta_equal_cd + | beq SFARG1HI, TMP3, ->vmeta_equal_cd |.endif - |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? + |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? |.if FFI - | beq TMP1, TMP3, ->vmeta_equal_cd + | beq SFARG2HI, TMP3, ->vmeta_equal_cd |.endif - |. xor TMP3, CARG2, CARG3 // Same tv? - | xor TMP1, TMP1, TMP0 // Same type? - | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? + |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? + | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? + | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? | movz TMP3, r0, AT // Ignore tv if primitive. - | movn CARG1, r0, TMP1 // Tab/ud and same type? - | or AT, TMP1, TMP3 // Same type && (pri||same tv). - | movz CARG1, r0, AT - | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. + | movn TMP0, r0, SFARG2HI // Tab/ud and same type? + | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). + | movz TMP0, r0, AT + | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. if (vk) { |. movn TMP2, r0, AT } else { @@ -2460,15 +3027,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | // Different tables or userdatas. Need to check __eq metamethod. | // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:TMP1, TAB:CARG2->metatable - | beqz TAB:TMP1, <1 // No metatable? + | lw TAB:TMP1, TAB:SFARG1LO->metatable + | beqz TAB:TMP1, >1 // No metatable? |. nop | lbu TMP1, TAB:TMP1->nomm | andi TMP1, TMP1, 1<<MM_eq - | bnez TMP1, <1 // Or 'no __eq' flag set? + | bnez TMP1, >1 // Or 'no __eq' flag set? |. nop | b ->vmeta_equal // Handle __eq metamethod. - |. li CARG4, 1-vk // ne = 0 or 1. + |. li TMP0, 1-vk // ne = 0 or 1. + |1: + | addu PC, PC, TMP2 + | ins_next break; case BC_ISEQS: case BC_ISNES: @@ -2505,38 +3075,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | ldc1 f0, 0(RA) - | addu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - | ldc1 f2, 0(RD) + | addu RD, KBASE, RD + | lw SFARG1HI, HI(RA) + | lw SFARG2HI, HI(RD) + | lhu TMP2, OFS_RD(PC) + | addiu PC, PC, 4 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, TMP0, LJ_TISNUM | decode_RD4b TMP2 - |.if FFI - | beqz AT, >5 - |.else - | beqz AT, >1 - |.endif - |. addu TMP2, TMP2, TMP3 - | c.eq.d f0, f2 if (vk) { - | movf TMP2, r0 - | addu PC, PC, TMP2 + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | bne SFARG1HI, TISNUM, >3 + |. lw SFARG1LO, LO(RA) + | lw SFARG2LO, LO(RD) + | addu TMP2, TMP2, TMP3 + | bne SFARG2HI, TISNUM, >6 + |. xor AT, SFARG1LO, SFARG2LO + if (vk) { + | movn TMP2, r0, AT |1: + | addu PC, PC, TMP2 + |2: } else { - | movt TMP2, r0 + | movz TMP2, r0, AT |1: + |2: | addu PC, PC, TMP2 } | ins_next + | + |3: // RA is not an integer. + | sltiu AT, SFARG1HI, LJ_TISNUM |.if FFI - |5: - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd + | beqz AT, >8 + |.else + | beqz AT, <2 + |.endif + |. addu TMP2, TMP2, TMP3 + | sltiu AT, SFARG2HI, LJ_TISNUM + |.if FPU + | ldc1 f20, 0(RA) + | ldc1 f22, 0(RD) + |.endif + | beqz AT, >5 + |. lw SFARG2LO, LO(RD) + |4: // RA and RD are both numbers. + |.if FPU + | c.eq.d f20, f22 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } + |.else + | bal ->vm_sfcmpeq |. nop | b <1 + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI + | bne SFARG2HI, TISNUM, >9 + |.else + | bne SFARG2HI, TISNUM, <2 + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 f22, LO(RD) + | b <4 + |. cvt.d.w f22, f22 + |.else + |. nop + | bal ->vm_sfi2d_2 + |. nop + | b <4 + |. nop + |.endif + | + |6: // RA is an integer, RD is not an integer + | sltiu AT, SFARG2HI, LJ_TISNUM + |.if FFI + | beqz AT, >9 + |.else + | beqz AT, <2 + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. mtc1 SFARG1LO, f20 + | ldc1 f22, 0(RD) + | b <4 + | cvt.d.w f20, f20 + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b <4 + |. nop + |.endif + | + |.if FFI + |8: + | li AT, LJ_TCDATA + | bne SFARG1HI, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |9: + | li AT, LJ_TCDATA + | bne SFARG2HI, AT, <2 + |. nop + | b ->vmeta_equal_cd |. nop |.endif break; @@ -2588,7 +3244,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addu PC, PC, TMP2 } else { | sltiu TMP0, TMP0, LJ_TISTRUECOND - | ldc1 f0, 0(RD) + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) if (op == BC_ISTC) { | beqz TMP0, >1 } else { @@ -2598,7 +3255,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | addu PC, PC, TMP2 |1: } @@ -2630,10 +3288,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_MOV: | // RA = dst*8, RD = src*8 | addu RD, BASE, RD - | addu RA, BASE, RA - | ldc1 f0, 0(RD) + | addu RA, BASE, RA + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_NOT: @@ -2650,16 +3310,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_UNM: | // RA = dst*8, RD = src*8 - | addu CARG3, BASE, RD + | addu RB, BASE, RD + | lw SFARG1HI, HI(RB) | addu RA, BASE, RA - | lw TMP0, HI(CARG3) - | ldc1 f0, 0(CARG3) - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. neg.d f0, f0 + | bne SFARG1HI, TISNUM, >2 + |. lw SFARG1LO, LO(RB) + | lui TMP1, 0x8000 + | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + |. negu SFARG1LO, SFARG1LO + |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFARG1HI, HI(RA) + | sw SFARG1LO, LO(RA) | ins_next2 + |2: + | sltiu AT, SFARG1HI, LJ_TISNUM + | beqz AT, ->vmeta_unm + |. lui TMP1, 0x8000 + | b <1 + |. xor SFARG1HI, SFARG1HI, TMP1 break; case BC_LEN: | // RA = dst*8, RD = src*8 @@ -2670,12 +3339,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li AT, LJ_TSTR | bne TMP0, AT, >2 |. li AT, LJ_TTAB - | lw CRET1, STR:CARG1->len + | lw CRET1, STR:CARG1->len |1: - | mtc1 CRET1, f0 - | cvt.d.w f0, f0 | ins_next1 - | sdc1 f0, 0(RA) + | sw TISNUM, HI(RA) + | sw CRET1, LO(RA) | ins_next2 |2: | bne TMP0, AT, ->vmeta_len @@ -2706,104 +3374,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Binary ops -------------------------------------------------------- */ - |.macro ins_arithpre + |.macro fpmod, a, b, c + | bal ->vm_floor // floor(b/c) + |. div.d FARG1, b, c + | mul.d a, FRET1, c + | sub.d a, b, a // b - floor(b/c)*c + |.endmacro + + |.macro sfpmod + | addiu sp, sp, -16 + | + | load_got __divdf3 + | sw SFARG1HI, HI(sp) + | sw SFARG1LO, LO(sp) + | sw SFARG2HI, 8+HI(sp) + | call_extern + |. sw SFARG2LO, 8+LO(sp) + | + | load_got floor + | move SFARG1HI, SFRETHI + | call_extern + |. move SFARG1LO, SFRETLO + | + | load_got __muldf3 + | move SFARG1HI, SFRETHI + | move SFARG1LO, SFRETLO + | lw SFARG2HI, 8+HI(sp) + | call_extern + |. lw SFARG2LO, 8+LO(sp) + | + | load_got __subdf3 + | lw SFARG1HI, HI(sp) + | lw SFARG1LO, LO(sp) + | move SFARG2HI, SFRETHI + | call_extern + |. move SFARG2LO, SFRETLO + | + | addiu sp, sp, 16 + |.endmacro + + |.macro ins_arithpre, label ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ||switch (vk) { ||case 0: - | addu CARG3, BASE, RB - | addu CARG4, KBASE, RC - | lw TMP1, HI(CARG3) - | ldc1 f20, 0(CARG3) - | ldc1 f22, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | addu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. addu RC, KBASE, RC || break; ||case 1: - | addu CARG4, BASE, RB - | addu CARG3, KBASE, RC - | lw TMP1, HI(CARG4) - | ldc1 f22, 0(CARG4) - | ldc1 f20, 0(CARG3) - | sltiu AT, TMP1, LJ_TISNUM + | decode_RB8a RC, INS + | decode_RB8b RC + | decode_RDtoRC8 RB, RD + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | addu RC, BASE, RC + |.if "label" ~= "none" + | b label + |.endif + |. addu RB, KBASE, RB || break; ||default: - | addu CARG3, BASE, RB - | addu CARG4, BASE, RC - | lw TMP1, HI(CARG3) - | lw TMP2, HI(CARG4) - | ldc1 f20, 0(CARG3) - | ldc1 f22, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM - | sltiu TMP0, TMP2, LJ_TISNUM - | and AT, AT, TMP0 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = src2*8 + | addu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. addu RC, BASE, RC || break; ||} - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA |.endmacro | - |.macro fpmod, a, b, c - |->BC_MODVN_Z: - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro + |.macro ins_arith, intins, fpins, fpcall, label + | ins_arithpre none | - |.macro ins_arith, ins - | ins_arithpre - |.if "ins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |. nop + |.if "label" ~= "none" + |label: + |.endif + | + | lw SFARG1HI, HI(RB) + | lw SFARG2HI, HI(RC) + | + |.if "intins" ~= "div" + | + | // Check for two integers. + | lw SFARG1LO, LO(RB) + | bne SFARG1HI, TISNUM, >5 + |. lw SFARG2LO, LO(RC) + | bne SFARG2HI, TISNUM, >5 + | + |.if "intins" == "addu" + |. intins CRET1, SFARG1LO, SFARG2LO + | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, SFARG2LO + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. addu RA, BASE, RA + |.elif "intins" == "subu" + |. intins CRET1, SFARG1LO, SFARG2LO + | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, SFARG1LO, SFARG2LO + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. addu RA, BASE, RA + |.elif "intins" == "mult" + |. intins SFARG1LO, SFARG2LO + | mflo CRET1 + | mfhi TMP2 + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. addu RA, BASE, RA + |.else + |. load_got lj_vm_modi + | beqz SFARG2LO, ->vmeta_arith + |. addu RA, BASE, RA + |.if ENDIAN_BE + | move CARG1, SFARG1LO + |.endif + | call_extern + |. move CARG2, SFARG2LO + |.endif + | + | ins_next1 + | sw TISNUM, HI(RA) + | sw CRET1, LO(RA) + |3: + | ins_next2 + | + |.elif not FPU + | + | lw SFARG1LO, LO(RB) + | lw SFARG2LO, LO(RC) + | + |.endif + | + |5: // Check for two numbers. + | .FPU ldc1 f20, 0(RB) + | sltiu AT, SFARG1HI, LJ_TISNUM + | sltiu TMP0, SFARG2HI, LJ_TISNUM + | .FPU ldc1 f22, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. addu RA, BASE, RA + | + |.if FPU + | fpins FRET1, f20, f22 + |.elif "fpcall" == "sfpmod" + | sfpmod |.else - | ins f0, f20, f22 + | load_got fpcall + | call_extern + |. nop + |.endif + | | ins_next1 - | sdc1 f0, 0(RA) + |.if not FPU + | sw SFRETHI, HI(RA) + |.endif + |.if "intins" ~= "div" + | b <3 + |.endif + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sw SFRETLO, LO(RA) + |.endif + |.if "intins" == "div" | ins_next2 |.endif + | |.endmacro case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add.d + | ins_arith addu, add.d, __adddf3, none break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub.d + | ins_arith subu, sub.d, __subdf3, none break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mul.d + | ins_arith mult, mul.d, __muldf3, none break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith div.d + case BC_DIVVN: + | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z + break; + case BC_DIVNV: case BC_DIVVV: + | ins_arithpre ->BC_DIVVN_Z break; case BC_MODVN: - | ins_arith fpmod + | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z break; case BC_MODNV: case BC_MODVV: - | ins_arith fpmod_ + | ins_arithpre ->BC_MODVN_Z break; case BC_POW: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG3, BASE, RB - | addu CARG4, BASE, RC - | lw TMP1, HI(CARG3) - | lw TMP2, HI(CARG4) - | ldc1 FARG1, 0(CARG3) - | ldc1 FARG2, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM - | sltiu TMP0, TMP2, LJ_TISNUM + | ins_arithpre none + | lw SFARG1HI, HI(RB) + | lw SFARG2HI, HI(RC) + | sltiu AT, SFARG1HI, LJ_TISNUM + | sltiu TMP0, SFARG2HI, LJ_TISNUM | and AT, AT, TMP0 | load_got pow | beqz AT, ->vmeta_arith |. addu RA, BASE, RA + |.if FPU + | ldc1 FARG1, 0(RB) + | ldc1 FARG2, 0(RC) + |.else + | lw SFARG1LO, LO(RB) + | lw SFARG2LO, LO(RC) + |.endif | call_extern |. nop | ins_next1 + |.if FPU | sdc1 FRET1, 0(RA) + |.else + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) + |.endif | ins_next2 break; @@ -2826,10 +3622,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bnez CRET1, ->vmeta_binop |. lw BASE, L->base | addu RB, BASE, MULTRES - | ldc1 f0, 0(RB) + | lw SFRETHI, HI(RB) + | lw SFRETLO, LO(RB) | addu RA, BASE, RA | ins_next1 - | sdc1 f0, 0(RA) // Copy result from RB to RA. + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; @@ -2864,20 +3662,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8 | sra RD, INS, 16 - | mtc1 RD, f0 | addu RA, BASE, RA - | cvt.d.w f0, f0 | ins_next1 - | sdc1 f0, 0(RA) + | sw TISNUM, HI(RA) + | sw RD, LO(RA) | ins_next2 break; case BC_KNUM: | // RA = dst*8, RD = num_const*8 | addu RD, KBASE, RD | addu RA, BASE, RA - | ldc1 f0, 0(RD) + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_KPRI: @@ -2913,9 +3712,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw UPVAL:RB, LFUNC:RD->uvptr | ins_next1 | lw TMP1, UPVAL:RB->v - | ldc1 f0, 0(TMP1) + | lw SFRETHI, HI(TMP1) + | lw SFRETLO, LO(TMP1) | addu RA, BASE, RA - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_USETV: @@ -2924,26 +3725,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl RA, RA, 1 | addu RD, BASE, RD | addu RA, RA, LFUNC:RB - | ldc1 f0, 0(RD) | lw UPVAL:RB, LFUNC:RA->uvptr + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | lbu TMP3, UPVAL:RB->marked | lw CARG2, UPVAL:RB->v | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbu TMP0, UPVAL:RB->closed - | lw TMP2, HI(RD) - | sdc1 f0, 0(CARG2) + | sw SFRETHI, HI(CARG2) + | sw SFRETLO, LO(CARG2) | li AT, LJ_GC_BLACK|1 | or TMP3, TMP3, TMP0 | beq TMP3, AT, >2 // Upvalue is closed and black? - |. addiu TMP2, TMP2, -(LJ_TNUMX+1) + |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) |1: | ins_next | |2: // Check if new value is collectable. | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | beqz AT, <1 // tvisgcv(v) - |. lw TMP1, LO(RD) - | lbu TMP3, GCOBJ:TMP1->gch.marked + |. nop + | lbu TMP3, GCOBJ:SFRETLO->gch.marked | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | beqz TMP3, <1 |. load_got lj_gc_barrieruv @@ -2991,11 +3793,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl RA, RA, 1 | addu RD, KBASE, RD | addu RA, RA, LFUNC:RB - | ldc1 f0, 0(RD) - | lw UPVAL:RB, LFUNC:RA->uvptr + | lw UPVAL:RB, LFUNC:RA->uvptr + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) + | lw TMP1, UPVAL:RB->v | ins_next1 - | lw TMP1, UPVAL:RB->v - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | ins_next2 break; case BC_USETP: @@ -3005,10 +3809,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl TMP0, RD, 3 | addu RA, RA, LFUNC:RB | not TMP0, TMP0 - | lw UPVAL:RB, LFUNC:RA->uvptr + | lw UPVAL:RB, LFUNC:RA->uvptr | ins_next1 - | lw TMP1, UPVAL:RB->v - | sw TMP0, HI(TMP1) + | lw TMP1, UPVAL:RB->v + | sw TMP0, HI(TMP1) | ins_next2 break; @@ -3044,8 +3848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP0, LJ_TFUNC | ins_next1 | addu RA, BASE, RA - | sw TMP0, HI(RA) | sw LFUNC:CRET1, LO(RA) + | sw TMP0, HI(RA) | ins_next2 break; @@ -3126,31 +3930,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP2, HI(CARG3) | lw TAB:RB, LO(CARG2) | li AT, LJ_TTAB - | ldc1 f0, 0(CARG3) | bne TMP1, AT, ->vmeta_tgetv |. addu RA, BASE, RA - | sltiu AT, TMP2, LJ_TISNUM - | beqz AT, >5 - |. li AT, LJ_TSTR - | - | // Convert number key to integer, check for integerness and range. - | cvt.w.d f2, f0 - | lw TMP0, TAB:RB->asize - | mfc1 TMP2, f2 - | cvt.d.w f4, f2 + | bne TMP2, TISNUM, >5 + |. lw RC, LO(CARG3) + | lw TMP0, TAB:RB->asize | lw TMP1, TAB:RB->array - | c.eq.d f0, f4 - | sltu AT, TMP2, TMP0 - | movf AT, r0 - | sll TMP2, TMP2, 3 + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 | beqz AT, ->vmeta_tgetv // Integer key and in array part? |. addu TMP2, TMP1, TMP2 - | lw TMP0, HI(TMP2) - | beq TMP0, TISNIL, >2 - |. ldc1 f0, 0(TMP2) + | lw SFRETHI, HI(TMP2) + | beq SFRETHI, TISNIL, >2 + |. lw SFRETLO, LO(TMP2) |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |2: // Check for __index if table value is nil. @@ -3165,8 +3961,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. nop | |5: + | li AT, LJ_TSTR | bne TMP2, AT, ->vmeta_tgetv - |. lw STR:RC, LO(CARG3) + |. nop | b ->BC_TGETS_Z // String key? |. nop break; @@ -3198,18 +3995,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | lw NODE:TMP1, NODE:TMP2->next - | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) + | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) | addiu CARG1, CARG1, -LJ_TSTR | xor TMP0, TMP0, STR:RC | or AT, CARG1, TMP0 | bnez AT, >4 |. lw TAB:TMP3, TAB:RB->metatable - | beq CARG2, TISNIL, >5 // Key found, but nil value? - |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) + | beq SFRETHI, TISNIL, >5 // Key found, but nil value? + |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) |3: | ins_next1 - | sw CARG2, HI(RA) - | sw CARG1, LO(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |4: // Follow hash chain. @@ -3219,7 +4016,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |5: // Check for __index if table value is nil. | beqz TAB:TMP3, <3 // No metatable: done. - |. li CARG2, LJ_TNIL + |. li SFRETHI, LJ_TNIL | lbu TMP0, TAB:TMP3->nomm | andi TMP0, TMP0, 1<<MM_index | bnez TMP0, <3 // 'no __index' flag set: done. @@ -3244,12 +4041,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sltu AT, TMP0, TMP1 | beqz AT, ->vmeta_tgetb |. addu RC, TMP2, RC - | lw TMP1, HI(RC) - | beq TMP1, TISNIL, >5 - |. ldc1 f0, 0(RC) + | lw SFRETHI, HI(RC) + | beq SFRETHI, TISNIL, >5 + |. lw SFRETLO, LO(RC) |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |5: // Check for __index if table value is nil. @@ -3260,7 +4058,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi TMP1, TMP1, 1<<MM_index | bnez TMP1, <1 // 'no __index' flag set: done. |. nop - | b ->vmeta_tgetb // Caveat: preserve TMP0! + | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! |. nop break; case BC_TGETR: @@ -3268,23 +4066,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RB8a RB, INS | decode_RB8b RB | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TAB:CARG1, LO(CARG2) - | ldc1 f0, 0(CARG3) - | trunc.w.d f2, f0 - | lw TMP0, TAB:CARG1->asize - | mfc1 CARG2, f2 + | addu RB, BASE, RB + | addu RC, BASE, RC + | lw TAB:CARG1, LO(RB) + | lw CARG2, LO(RC) + | addu RA, BASE, RA + | lw TMP0, TAB:CARG1->asize | lw TMP1, TAB:CARG1->array | sltu AT, CARG2, TMP0 | sll TMP2, CARG2, 3 | beqz AT, ->vmeta_tgetr // In array part? - |. addu TMP2, TMP1, TMP2 - | ldc1 f0, 0(TMP2) + |. addu CRET1, TMP1, TMP2 + | lw SFARG2HI, HI(CRET1) + | lw SFARG2LO, LO(CRET1) |->BC_TGETR_Z: - | addu RA, BASE, RA | ins_next1 - | sdc1 f0, 0(RA) + | sw SFARG2HI, HI(RA) + | sw SFARG2LO, LO(RA) | ins_next2 break; @@ -3299,33 +4097,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP2, HI(CARG3) | lw TAB:RB, LO(CARG2) | li AT, LJ_TTAB - | ldc1 f0, 0(CARG3) | bne TMP1, AT, ->vmeta_tsetv |. addu RA, BASE, RA - | sltiu AT, TMP2, LJ_TISNUM - | beqz AT, >5 - |. li AT, LJ_TSTR - | - | // Convert number key to integer, check for integerness and range. - | cvt.w.d f2, f0 - | lw TMP0, TAB:RB->asize - | mfc1 TMP2, f2 - | cvt.d.w f4, f2 + | bne TMP2, TISNUM, >5 + |. lw RC, LO(CARG3) + | lw TMP0, TAB:RB->asize | lw TMP1, TAB:RB->array - | c.eq.d f0, f4 - | sltu AT, TMP2, TMP0 - | movf AT, r0 - | sll TMP2, TMP2, 3 + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 | beqz AT, ->vmeta_tsetv // Integer key and in array part? |. addu TMP1, TMP1, TMP2 - | lbu TMP3, TAB:RB->marked | lw TMP0, HI(TMP1) + | lbu TMP3, TAB:RB->marked + | lw SFRETHI, HI(RA) | beq TMP0, TISNIL, >3 - |. ldc1 f0, 0(RA) + |. lw SFRETLO, LO(RA) |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sdc1 f0, 0(TMP1) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | sw SFRETHI, HI(TMP1) + | bnez AT, >7 + |. sw SFRETLO, LO(TMP1) |2: | ins_next | @@ -3341,8 +4132,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. nop | |5: + | li AT, LJ_TSTR | bne TMP2, AT, ->vmeta_tsetv - |. lw STR:RC, LO(CARG3) + |. nop | b ->BC_TSETS_Z // String key? |. nop | @@ -3374,7 +4166,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + |.if FPU | ldc1 f20, 0(RA) + |.else + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) + |.endif |1: | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) @@ -3388,8 +4185,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. lw TAB:TMP0, TAB:RB->metatable |2: | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | bnez AT, >7 |. sdc1 f20, NODE:TMP2->val + |.else + | sw SFRETHI, NODE:TMP2->val.u32.hi + | bnez AT, >7 + |. sw SFRETLO, NODE:TMP2->val.u32.lo + |.endif |3: | ins_next | @@ -3427,8 +4230,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. move CARG1, L | // Returns TValue *. | lw BASE, L->base + |.if FPU | b <3 // No 2nd write barrier needed. |. sdc1 f20, 0(CRET1) + |.else + | lw SFARG1HI, HI(RA) + | lw SFARG1LO, LO(RA) + | sw SFARG1HI, HI(CRET1) + | b <3 // No 2nd write barrier needed. + |. sw SFARG1LO, LO(CRET1) + |.endif | |7: // Possible table write barrier for the value. Skip valiswhite check. | barrierback TAB:RB, TMP3, TMP0, <3 @@ -3453,11 +4264,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP1, HI(RC) | lbu TMP3, TAB:RB->marked | beq TMP1, TISNIL, >5 - |. ldc1 f0, 0(RA) |1: + |. lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | sw SFRETHI, HI(RC) | bnez AT, >7 - |. sdc1 f0, 0(RC) + |. sw SFRETLO, LO(RC) |2: | ins_next | @@ -3469,7 +4282,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi TMP1, TMP1, 1<<MM_newindex | bnez TMP1, <1 // 'no __newindex' flag set: done. |. nop - | b ->vmeta_tsetb // Caveat: preserve TMP0! + | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! |. nop | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -3482,13 +4295,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RDtoRC8 RC, RD | addu CARG1, BASE, RB | addu CARG3, BASE, RC - | lw TAB:CARG2, LO(CARG1) - | ldc1 f0, 0(CARG3) - | trunc.w.d f2, f0 - | lbu TMP3, TAB:CARG2->marked + | lw TAB:CARG2, LO(CARG1) + | lw CARG3, LO(CARG3) + | lbu TMP3, TAB:CARG2->marked | lw TMP0, TAB:CARG2->asize - | mfc1 CARG3, f2 - | lw TMP1, TAB:CARG2->array + | lw TMP1, TAB:CARG2->array | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | bnez AT, >7 |. addu RA, BASE, RA @@ -3496,18 +4307,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sltu AT, CARG3, TMP0 | sll TMP2, CARG3, 3 | beqz AT, ->vmeta_tsetr // In array part? - |. ldc1 f20, 0(RA) - | addu CRET1, TMP1, TMP2 + |. addu CRET1, TMP1, TMP2 |->BC_TSETR_Z: + | lw SFARG1HI, HI(RA) + | lw SFARG1LO, LO(RA) | ins_next1 - | sdc1 f20, 0(CRET1) + | sw SFARG1HI, HI(CRET1) + | sw SFARG1LO, LO(CRET1) | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 + | barrierback TAB:CARG2, TMP3, TMP0, <2 break; - case BC_TSETM: | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | addu RA, BASE, RA @@ -3529,10 +4341,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addu TMP1, TMP1, CARG1 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) |3: // Copy result slots to table. - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | sltu AT, RA, TMP2 - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | bnez AT, <3 |. addiu TMP1, TMP1, 8 | bnez TMP0, >7 @@ -3607,10 +4421,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz NARGS8:RC, >3 |. move TMP3, NARGS8:RC |2: - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | addiu TMP3, TMP3, -8 - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) | bnez TMP3, <2 |. addiu TMP2, TMP2, 8 |3: @@ -3647,12 +4463,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li AT, LJ_TFUNC | lw TMP1, -24+HI(BASE) | lw LFUNC:RB, -24+LO(BASE) - | ldc1 f2, -8(BASE) - | ldc1 f0, -16(BASE) + | lw SFARG1HI, -16+HI(BASE) + | lw SFARG1LO, -16+LO(BASE) + | lw SFARG2HI, -8+HI(BASE) + | lw SFARG2LO, -8+LO(BASE) | sw TMP1, HI(BASE) // Copy callable. | sw LFUNC:RB, LO(BASE) - | sdc1 f2, 16(BASE) // Copy control var. - | sdc1 f0, 8(BASE) // Copy state. + | sw SFARG1HI, 8+HI(BASE) // Copy state. + | sw SFARG1LO, 8+LO(BASE) + | sw SFARG2HI, 16+HI(BASE) // Copy control var. + | sw SFARG2LO, 16+LO(BASE) | addiu BASE, BASE, 8 | bne TMP1, AT, ->vmeta_call |. li NARGS8:RC, 16 // Iterators get 2 arguments. @@ -3675,20 +4495,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz AT, >5 // Index points after array part? |. sll TMP3, RC, 3 | addu TMP3, TMP1, TMP3 - | lw TMP2, HI(TMP3) - | ldc1 f0, 0(TMP3) - | mtc1 RC, f2 + | lw SFARG1HI, HI(TMP3) + | lw SFARG1LO, LO(TMP3) | lhu RD, -4+OFS_RD(PC) - | beq TMP2, TISNIL, <1 // Skip holes in array part. + | sw TISNUM, HI(RA) + | sw RC, LO(RA) + | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. |. addiu RC, RC, 1 - | cvt.d.w f2, f2 + | sw SFARG1HI, 8+HI(RA) + | sw SFARG1LO, 8+LO(RA) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sdc1 f0, 8(RA) | decode_RD4b RD | addu RD, RD, TMP3 | sw RC, -8+LO(RA) // Update control var. | addu PC, PC, RD - | sdc1 f2, 0(RA) |3: | ins_next | @@ -3703,18 +4523,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sll RB, RC, 3 | subu TMP3, TMP3, RB | addu NODE:TMP3, TMP3, TMP2 - | lw RB, HI(NODE:TMP3) - | ldc1 f0, 0(NODE:TMP3) + | lw SFARG1HI, NODE:TMP3->val.u32.hi + | lw SFARG1LO, NODE:TMP3->val.u32.lo | lhu RD, -4+OFS_RD(PC) - | beq RB, TISNIL, <6 // Skip holes in hash part. + | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. |. addiu RC, RC, 1 - | ldc1 f2, NODE:TMP3->key + | lw SFARG2HI, NODE:TMP3->key.u32.hi + | lw SFARG2LO, NODE:TMP3->key.u32.lo | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sdc1 f0, 8(RA) + | sw SFARG1HI, 8+HI(RA) + | sw SFARG1LO, 8+LO(RA) | addu RC, RC, TMP0 | decode_RD4b RD | addu RD, RD, TMP3 - | sdc1 f2, 0(RA) + | sw SFARG2HI, HI(RA) + | sw SFARG2LO, LO(RA) | addu PC, PC, RD | b <3 |. sw RC, -8+LO(RA) // Update control var. @@ -3723,24 +4546,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISNEXT: | // RA = base*8, RD = target (points to ITERN) | addu RA, BASE, RA - | lw TMP0, -24+HI(RA) - | lw CFUNC:TMP1, -24+LO(RA) - | lw TMP2, -16+HI(RA) - | lw TMP3, -8+HI(RA) + | srl TMP0, RD, 1 + | lw CARG1, -24+HI(RA) + | lw CFUNC:CARG2, -24+LO(RA) + | addu TMP0, PC, TMP0 + | lw CARG3, -16+HI(RA) + | lw CARG4, -8+HI(RA) | li AT, LJ_TFUNC - | bne TMP0, AT, >5 - |. addiu TMP2, TMP2, -LJ_TTAB - | lbu TMP1, CFUNC:TMP1->ffid - | addiu TMP3, TMP3, -LJ_TNIL - | srl TMP0, RD, 1 - | or TMP2, TMP2, TMP3 - | addiu TMP1, TMP1, -FF_next_N - | addu TMP0, PC, TMP0 - | or TMP1, TMP1, TMP2 - | bnez TMP1, >5 - |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | bne CARG1, AT, >5 + |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | lbu CARG2, CFUNC:CARG2->ffid + | addiu CARG3, CARG3, -LJ_TTAB + | addiu CARG4, CARG4, -LJ_TNIL + | or CARG3, CARG3, CARG4 + | addiu CARG2, CARG2, -FF_next_N + | or CARG2, CARG2, CARG3 + | bnez CARG2, >5 + |. lui TMP1, 0xfffe | addu PC, TMP0, TMP2 - | lui TMP1, 0xfffe | ori TMP1, TMP1, 0x7fff | sw r0, -8+LO(RA) // Initialize control var. | sw TMP1, -8+HI(RA) @@ -3750,7 +4573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP3, BC_JMP | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 + | addu PC, TMP0, TMP2 | b <1 |. sb TMP1, OFS_OP(PC) break; @@ -3794,9 +4617,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bnez AT, >7 |. addiu MULTRES, TMP1, 8 |6: - | ldc1 f0, 0(RC) + | lw SFRETHI, HI(RC) + | lw SFRETLO, LO(RC) | addiu RC, RC, 8 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | sltu AT, RC, TMP3 | bnez AT, <6 // More vararg slots? |. addiu RA, RA, 8 @@ -3852,10 +4677,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz RC, >3 |. subu BASE, TMP2, TMP0 |2: - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | addiu RC, RC, -8 - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) | bnez RC, <2 |. addiu TMP2, TMP2, 8 |3: @@ -3896,14 +4723,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw INS, -4(PC) | addiu TMP2, BASE, -8 if (op == BC_RET1) { - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) } | decode_RB8a RB, INS | decode_RA8a RA, INS | decode_RB8b RB | decode_RA8b RA if (op == BC_RET1) { - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) } | subu BASE, TMP2, RA |5: @@ -3945,69 +4774,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base*8, RD = target (after end of loop or start of loop) vk = (op == BC_IFORL || op == BC_JFORL); | addu RA, BASE, RA - if (vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | lw TMP3, FORL_STEP*8+HI(RA) - | add.d f0, f0, f4 - | sdc1 f0, FORL_IDX*8(RA) - } else { - | lw TMP1, FORL_IDX*8+HI(RA) - | lw TMP3, FORL_STEP*8+HI(RA) - | lw TMP2, FORL_STOP*8+HI(RA) - | sltiu TMP1, TMP1, LJ_TISNUM - | sltiu TMP0, TMP3, LJ_TISNUM - | sltiu TMP2, TMP2, LJ_TISNUM - | and TMP1, TMP1, TMP0 - | and TMP1, TMP1, TMP2 - | ldc1 f0, FORL_IDX*8(RA) - | beqz TMP1, ->vmeta_for - |. ldc1 f2, FORL_STOP*8(RA) - } + | lw SFARG1HI, FORL_IDX*8+HI(RA) + | lw SFARG1LO, FORL_IDX*8+LO(RA) if (op != BC_JFORL) { | srl RD, RD, 1 - | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, RD, TMP2 } - | c.le.d 0, f0, f2 - | c.le.d 1, f2, f0 - | sdc1 f0, FORL_EXT*8(RA) + if (!vk) { + | lw SFARG2HI, FORL_STOP*8+HI(RA) + | lw SFARG2LO, FORL_STOP*8+LO(RA) + | bne SFARG1HI, TISNUM, >5 + |. lw SFRETHI, FORL_STEP*8+HI(RA) + | xor AT, SFARG2HI, TISNUM + | lw SFRETLO, FORL_STEP*8+LO(RA) + | xor TMP0, SFRETHI, TISNUM + | or AT, AT, TMP0 + | bnez AT, ->vmeta_for + |. slt AT, SFRETLO, r0 + | slt CRET1, SFARG2LO, SFARG1LO + | slt TMP1, SFARG1LO, SFARG2LO + | movn CRET1, TMP1, AT + } else { + | bne SFARG1HI, TISNUM, >5 + |. lw SFARG2LO, FORL_STEP*8+LO(RA) + | lw SFRETLO, FORL_STOP*8+LO(RA) + | move TMP3, SFARG1LO + | addu SFARG1LO, SFARG1LO, SFARG2LO + | xor TMP0, SFARG1LO, TMP3 + | xor TMP1, SFARG1LO, SFARG2LO + | and TMP0, TMP0, TMP1 + | slt TMP1, SFARG1LO, SFRETLO + | slt CRET1, SFRETLO, SFARG1LO + | slt AT, SFARG2LO, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + | movn CRET1, TMP1, AT + | or CRET1, CRET1, TMP0 + } + |1: + if (op == BC_FORI) { + | movz TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | addu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } + if (vk) { + | sw SFARG1HI, FORL_IDX*8+HI(RA) + | sw SFARG1LO, FORL_IDX*8+LO(RA) + } + | ins_next1 + | sw SFARG1HI, FORL_EXT*8+HI(RA) + | sw SFARG1LO, FORL_EXT*8+LO(RA) + |2: if (op == BC_JFORI) { - | li TMP1, 1 - | li TMP2, 1 - | addu TMP0, RD, TMP0 - | slt TMP3, TMP3, r0 - | movf TMP1, r0, 0 - | addu PC, PC, TMP0 - | movf TMP2, r0, 1 - | lhu RD, -4+OFS_RD(PC) - | movn TMP1, TMP2, TMP3 - | bnez TMP1, =>BC_JLOOP + | beqz CRET1, =>BC_JLOOP |. decode_RD8b RD } else if (op == BC_JFORL) { - | li TMP1, 1 - | li TMP2, 1 - | slt TMP3, TMP3, r0 - | movf TMP1, r0, 0 - | movf TMP2, r0, 1 - | movn TMP1, TMP2, TMP3 - | bnez TMP1, =>BC_JLOOP + | beqz CRET1, =>BC_JLOOP + } + | ins_next2 + | + |5: // FP loop. + |.if FPU + if (!vk) { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | sltiu TMP0, SFARG1HI, LJ_TISNUM + | sltiu TMP1, SFARG2HI, LJ_TISNUM + | sltiu AT, SFRETHI, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, SFRETHI, r0 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | lw SFARG2HI, FORL_STEP*8+HI(RA) + | add.d f0, f0, f4 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, SFARG2HI, r0 + | li CRET1, 1 + | li AT, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | movn CRET1, AT, TMP3 + if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 + |. sdc1 f0, FORL_EXT*8(RA) + } + |.else + if (!vk) { + | sltiu TMP0, SFARG1HI, LJ_TISNUM + | sltiu TMP1, SFARG2HI, LJ_TISNUM + | sltiu AT, SFRETHI, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. nop + | bal ->vm_sfcmpolex + |. move TMP3, SFRETHI + | b <1 |. nop } else { - | addu TMP1, RD, TMP0 - | slt TMP3, TMP3, r0 - | move TMP2, TMP1 - if (op == BC_FORI) { - | movt TMP1, r0, 0 - | movt TMP2, r0, 1 + | lw SFARG2HI, FORL_STEP*8+HI(RA) + | load_got __adddf3 + | call_extern + |. sw TMP2, ARG5 + | lw SFARG2HI, FORL_STOP*8+HI(RA) + | lw SFARG2LO, FORL_STOP*8+LO(RA) + | move SFARG1HI, SFRETHI + | move SFARG1LO, SFRETLO + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + if ( op == BC_JFORL ) { + | lhu RD, -4+OFS_RD(PC) + | lw TMP2, ARG5 + | b <1 + |. decode_RD8b RD } else { - | movf TMP1, r0, 0 - | movf TMP2, r0, 1 + | b <1 + |. lw TMP2, ARG5 } - | movn TMP1, TMP2, TMP3 - | addu PC, PC, TMP1 } - | ins_next + |.endif break; case BC_ITERL: @@ -4256,8 +5163,10 @@ static void emit_asm_debug(BuildCtx *ctx) fcofs, CFRAME_SIZE); for (i = 23; i >= 16; i--) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); +#if !LJ_SOFTFP for (i = 30; i >= 20; i -= 2) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); +#endif fprintf(ctx->fp, "\t.align 2\n" ".LEFDE0:\n\n"); @@ -4275,6 +5184,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 2\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif +#if !LJ_NO_UNWIND fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); fprintf(ctx->fp, "\t.globl lj_err_unwind_dwarf\n" @@ -4308,8 +5218,10 @@ static void emit_asm_debug(BuildCtx *ctx) fcofs, CFRAME_SIZE); for (i = 23; i >= 16; i--) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); +#if !LJ_SOFTFP for (i = 30; i >= 20; i -= 2) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); +#endif fprintf(ctx->fp, "\t.align 2\n" ".LEFDE2:\n\n"); @@ -4343,6 +5255,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 2\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif break; default: break; diff --git a/luajit-2.1/src/vm_mips64.dasc b/luajit-2.1/src/vm_mips64.dasc new file mode 100644 index 0000000..c06270a --- /dev/null +++ b/luajit-2.1/src/vm_mips64.dasc @@ -0,0 +1,5062 @@ +|// Low-level VM code for MIPS64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// +|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +|// Sponsored by Cisco Systems, Inc. +| +|.arch mips64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra +| +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, r16 // Base of current Lua stack frame. +|.define KBASE, r17 // Constants of current Lua function. +|.define PC, r18 // Next PC. +|.define DISPATCH, r19 // Opcode dispatch table. +|.define LREG, r20 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. +| +|.define JGL, r30 // On-trace: global_State + 32768. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, r30 +|.define TISNUM, r22 +|.if FPU +|.define TOBIT, f30 // 2^52 + 2^51. +|.endif +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, r23 // Callee-save. +|.define RB, r8 +|.define RC, r9 +|.define RD, r10 +|.define INS, r11 +| +|.define AT, r1 // Assembler temporary. +|.define TMP0, r12 +|.define TMP1, r13 +|.define TMP2, r14 +|.define TMP3, r15 +| +|// MIPS n64 calling convention. +|.define CFUNCADDR, r25 +|.define CARG1, r4 +|.define CARG2, r5 +|.define CARG3, r6 +|.define CARG4, r7 +|.define CARG5, r8 +|.define CARG6, r9 +|.define CARG7, r10 +|.define CARG8, r11 +| +|.define CRET1, r2 +|.define CRET2, r3 +| +|.if FPU +|.define FARG1, f12 +|.define FARG2, f13 +|.define FARG3, f14 +|.define FARG4, f15 +|.define FARG5, f16 +|.define FARG6, f17 +|.define FARG7, f18 +|.define FARG8, f19 +| +|.define FRET1, f0 +|.define FRET2, f2 +|.endif +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.if FPU // MIPS64 hard-float. +| +|.define CFRAME_SPACE, 192 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 188(sp) // 32 bit values. +|.define SAVE_NRES, 184(sp) +|.define SAVE_CFRAME, 176(sp) // 64 bit values. +|.define SAVE_L, 168(sp) +|.define SAVE_PC, 160(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. +|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. +| +|.else // MIPS64 soft-float +| +|.define CFRAME_SPACE, 128 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 124(sp) // 32 bit values. +|.define SAVE_NRES, 120(sp) +|.define SAVE_CFRAME, 112(sp) // 64 bit values. +|.define SAVE_L, 104(sp) +|.define SAVE_PC, 96(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. +| +|.endif +| +|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. +|.define TMPD, 0(sp) +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|.define SAVE_MULTRES, TMPD +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| daddiu sp, sp, -CFRAME_SPACE +| sd ra, SAVE_GPR_+9*8(sp) +| sd r30, SAVE_GPR_+8*8(sp) +| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) +| sd r23, SAVE_GPR_+7*8(sp) +| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) +| sd r22, SAVE_GPR_+6*8(sp) +| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) +| sd r21, SAVE_GPR_+5*8(sp) +| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) +| sd r20, SAVE_GPR_+4*8(sp) +| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) +| sd r19, SAVE_GPR_+3*8(sp) +| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) +| sd r18, SAVE_GPR_+2*8(sp) +| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) +| sd r17, SAVE_GPR_+1*8(sp) +| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) +| sd r16, SAVE_GPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ld ra, SAVE_GPR_+9*8(sp) +| ld r30, SAVE_GPR_+8*8(sp) +| ld r23, SAVE_GPR_+7*8(sp) +| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) +| ld r22, SAVE_GPR_+6*8(sp) +| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) +| ld r21, SAVE_GPR_+5*8(sp) +| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) +| ld r20, SAVE_GPR_+4*8(sp) +| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) +| ld r19, SAVE_GPR_+3*8(sp) +| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) +| ld r18, SAVE_GPR_+2*8(sp) +| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) +| ld r17, SAVE_GPR_+1*8(sp) +| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) +| ld r16, SAVE_GPR_+0*8(sp) +| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) +| jr ra +| daddiu sp, sp, CFRAME_SPACE +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; .long 0xf0f0f0f0; .endmacro +| +|// Macros to mark delay slots. +|.macro ., a; a; .endmacro +|.macro ., a,b; a,b; .endmacro +|.macro ., a,b,c; a,b,c; .endmacro +|.macro ., a,b,c,d; a,b,c,d; .endmacro +| +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. +|.if ENDIAN_LE +|.define HI, 4 +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define HI, 0 +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| +|// Instruction decode. +|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro +|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro +|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro +|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro +|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro +|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| lw INS, 0(PC) +| daddiu PC, PC, 4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP8a TMP1, INS +| decode_OP8b TMP1 +| daddu TMP0, DISPATCH, TMP1 +| decode_RD8a RD, INS +| ld AT, 0(TMP0) +| decode_RA8a RA, INS +| decode_RD8b RD +| jr AT +| decode_RA8b RA +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ld PC, LFUNC:RB->pc +| lw INS, 0(PC) +| daddiu PC, PC, 4 +| decode_OP8a TMP1, INS +| decode_RA8a RA, INS +| decode_OP8b TMP1 +| decode_RA8b RA +| daddu TMP0, DISPATCH, TMP1 +| ld TMP0, 0(TMP0) +| jr TMP0 +| daddu RA, RA, BASE +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| sd PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +| srl TMP0, RD, 1 +| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) +| addu TMP0, TMP0, AT +| daddu PC, PC, TMP0 +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro load_got, func +| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) +|.endmacro +|// Much faster. Sadly, there's no easy way to force the required code layout. +|// .macro call_intern, func; bal extern func; .endmacro +|.macro call_intern, func; jalr CFUNCADDR; .endmacro +|.macro call_extern; jalr CFUNCADDR; .endmacro +|.macro jmp_extern; jr CFUNCADDR; .endmacro +| +|.macro hotcheck, delta, target +| dsrl TMP1, PC, 1 +| andi TMP1, TMP1, 126 +| daddu TMP1, TMP1, DISPATCH +| lhu TMP2, GG_DISP2HOT(TMP1) +| addiu TMP2, TMP2, -delta +| bltz TMP2, target +|. sh TMP2, GG_DISP2HOT(TMP1) +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro +|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) +| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) +| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) +| sb mark, tab->marked +| b target +|. sd tmp, tab->gclist +|.endmacro +| +|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. +|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro +|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. +|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; dsra dst, src, 47; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp reg +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp dst, reg +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target // Caveat: has delay slot! +| gettp AT, reg +| bne AT, TISNUM, target +|.endmacro +|.macro checknum, reg, target // Caveat: has delay slot! +| gettp AT, reg +| sltiu AT, AT, LJ_TISNUM +| beqz AT, target +|.endmacro +| +|.macro mov_false, reg +| lu reg, 0x8000 +| dsll reg, reg, 32 +| not reg, reg +|.endmacro +|.macro mov_true, reg +| li reg, 0x0001 +| dsll reg, reg, 48 +| not reg, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi AT, PC, FRAME_P + | beqz AT, ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + |. mov_true TMP1 + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | move BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | daddiu RA, RA, -8 + | + |->vm_returnc: + | addiu RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | beqz RD, ->vm_unwind_c_eh + |. li CRET1, LUA_YIELD + | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + |. move MULTRES, RD + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | li TMP2, -8 + | xori AT, TMP0, FRAME_C + | and TMP2, PC, TMP2 + | bnez AT, ->vm_returnp + | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. + | + | addiu TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES + | daddiu BASE, BASE, -16 + | st_vmstate + | beqz TMP1, >2 + |. sll TMP2, TMP2, 3 + |1: + | addiu TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sd CRET1, 0(BASE) + | bnez TMP1, <1 + |. daddiu BASE, BASE, 8 + | + |2: + | bne TMP2, RD, >6 + |3: + |. sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME // Restore previous C frame. + | move CRET1, r0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | slt AT, TMP2, RD + | bnez AT, >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + |. slt AT, BASE, TMP1 + | beqz AT, >8 + |. nop + | sd TISNIL, 0(BASE) + | addiu RD, RD, 8 + | b <2 + |. daddiu BASE, BASE, 8 + | + |7: // Less results wanted. + | subu TMP0, RD, TMP2 + | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. + | b <3 + |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | load_got lj_state_growstack + | move MULTRES, RD + | srl CARG2, TMP2, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | lw TMP2, SAVE_NRES + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | move RD, MULTRES + | b <2 + |. sll TMP2, TMP2, 3 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | move sp, CARG1 + | move CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | li TMP0, ~LJ_VMST_C + | ld GL:TMP1, L->glref + | b ->vm_leave_unw + |. sw TMP0, GL:TMP1->vmstate + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | li AT, -4 + | and sp, CARG1, AT + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | .FPU mtc1 TMP3, TOBIT + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | .FPU cvt.d.s TOBIT, TOBIT + | daddiu RA, BASE, -8 // Results start at BASE-8. + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd TMP1, 0(RA) // Prepend false to error message. + | st_vmstate + | b ->vm_returnc + |. li RD, 16 // 2 results: false + error message. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | b >2 + |. li CARG2, LUA_MINSTACK + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | daddu RC, BASE, RC + | dsubu RA, RA, BASE + | sd BASE, L->base + | daddiu PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srl CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | load_got lj_state_growstack + | sd PC, SAVE_PC + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | move L, CARG1 + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | move BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L + | li PC, FRAME_CP + | daddiu TMP0, sp, CFRAME_RESUME + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw r0, SAVE_NRES + | sw r0, SAVE_ERRF + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd r0, SAVE_CFRAME + | beqz TMP1, >3 + |. sd TMP0, L->cframe + | + | // Resume after yield (like a return). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | move RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsubu RD, TMP1, BASE + | .FPU mtc1 TMP3, TOBIT + | sb r0, L->status + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | daddiu RD, RD, 8 + | st_vmstate + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | beqz TMP0, ->BC_RET_Z + |. li TISNUM, LJ_TISNUM + | b ->vm_return + |. nop + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF + | b >1 + |. li PC, FRAME_CP + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | move L, CARG1 + | sw CARG3, SAVE_NRES + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_L + | move BASE, CARG2 + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld TMP1, L->top + | .FPU mtc1 TMP3, TOBIT + | daddu PC, PC, BASE + | dsubu NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | dsubu PC, PC, TMP2 // PC = frame delta + frame type + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | move L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L + | ld TMP1, L->top + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. + | sw r0, SAVE_ERRF // No error function. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + |. move CFUNCADDR, CARG4 + | move BASE, CRET1 + | bnez CRET1, <3 // Else continue with the call. + |. li PC, FRAME_CP + | b ->vm_leave_cp // No base? Just remove C frame. + |. nop + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the + |// stack, so BASE doesn't need to be reloaded across these calls. + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | move RB, BASE + | move BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + |.if FFI + | sltiu AT, TMP0, 2 + |.endif + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | daddu TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + |.if FFI + | bnez AT, >1 + |.endif + |. sd TISNIL, -8(TMP2) // Ensure one valid arg. + | // BASE = base, RA = resultptr, RB = meta base + | jr TMP0 // Jump to continuation. + |. ld KBASE, PC2PROTO(k)(TMP1) + | + |.if FFI + |1: + | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + |. daddiu TMP1, RB, -32 + | b ->vm_call_tail + |. dsubu RC, TMP1, BASE + |.endif + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | daddiu CARG2, RB, -32 + | ld CRET1, 0(RA) + | decode_RB8a MULTRES, INS + | decode_RA8a RA, INS + | decode_RB8b MULTRES + | decode_RA8b RA + | daddu TMP1, BASE, MULTRES + | sd BASE, L->base + | dsubu CARG3, CARG2, TMP1 + | bne TMP1, CARG2, ->BC_CAT_Z + |. sd CRET1, 0(CARG2) + | daddu RA, BASE, RA + | b ->cont_nop + |. sd CRET1, 0(RA) + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | load_got lj_meta_tget + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. daddiu TMP1, BASE, -FRAME_CONT + | ld CARG1, 0(CRET1) + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 16 // 2 args for func(t, k). + | + |->vmeta_tgetr: + | load_got lj_tab_getinth + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. nop + | // Returns cTValue * or NULL. + | beqz CRET1, ->BC_TGETR_Z + |. move CARG2, TISNIL + | b ->BC_TGETR_Z + |. ld CARG2, 0(CRET1) + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | load_got lj_meta_tset + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. ld CARG1, 0(RA) + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | daddiu TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | sd CARG1, 16(BASE) // Copy value to third argument. + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 24 // 3 args for func(t, k, v) + | + |->vmeta_tsetr: + | load_got lj_tab_setinth + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + |. move CARG1, L + | // Returns TValue *. + | b ->BC_TSETR_Z + |. nop + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | move CARG2, RA + | move CARG3, RD + | load_got lj_meta_comp + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | decode_OP1 CARG4, INS + | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu AT, CRET1, 2 + | beqz AT, ->vmeta_binop + | negu TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | daddiu PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) + | sll RD, RD, 2 + | addu RD, RD, TMP1 + | and RD, RD, TMP2 + | daddu PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld CRET1, 0(RA) + | sll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | b ->cont_nop + |. sd CRET1, 0(TMP1) + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. negu TMP2, AT // Branch if result is true. + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. addiu TMP2, AT, -1 // Branch if result is false. + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | load_got lj_meta_equal + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | move CARG4, TMP0 + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + | + |->vmeta_equal_cd: + |.if FFI + | load_got lj_meta_equal_cd + | move CARG2, INS + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + |.endif + | + |->vmeta_istype: + | load_got lj_meta_istype + | daddiu PC, PC, -4 + | sd BASE, L->base + | srl CARG2, RA, 3 + | srl CARG3, RD, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + |. move CARG1, L + | b ->cont_nop + |. nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | move RC, RB + | + |->vmeta_arith: + | load_got lj_meta_arith + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move CARG3, RB + | move CARG4, RC + | decode_OP1 CARG5, INS // CARG5 == RB. + | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | beqz CRET1, ->cont_nop + |. nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | dsubu TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | move TMP2, BASE + | daddiu PC, TMP1, FRAME_CONT + | move BASE, CRET1 + | b ->vm_call_dispatch + |. li NARGS8:RC, 16 // 2 args for func(o1, o2). + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | move MULTRES, CARG1 +#endif + | load_got lj_meta_len + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_len // (lua_State *L, TValue *o) + |. move CARG1, L + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bnez CRET1, ->vmeta_binop // Binop call for compatibility. + |. nop + | b ->BC_LEN_Z + |. move CARG1, MULTRES +#else + | b ->vmeta_binop // Binop call for compatibility. + |. nop +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | load_got lj_meta_call + | sd TMP2, L->base // This is the callers base! + | daddiu CARG2, BASE, -16 + | sd PC, SAVE_PC + | daddu CARG3, BASE, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | load_got lj_meta_call + | sd BASE, L->base + | daddiu CARG2, RA, -16 + | sd PC, SAVE_PC + | daddu CARG3, RA, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | b ->BC_CALLT_Z + |. cleartp LFUNC:CARG3, RB + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | load_got lj_meta_for + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move MULTRES, INS + | call_intern lj_meta_for // (lua_State *L, TValue *base) + |. move CARG1, L + |.if JIT + | decode_OP1 TMP0, MULTRES + | li AT, BC_JFORI + |.endif + | decode_RA8a RA, MULTRES + | decode_RD8a RD, MULTRES + | decode_RA8b RA + |.if JIT + | beq TMP0, AT, =>BC_JFORI + |. decode_RD8b RD + | b =>BC_FORI + |. nop + |.else + | b =>BC_FORI + |. decode_RD8b RD + |.endif + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG1, 0(BASE) + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu AT, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | bnez AT, ->fff_fallback + |. ld CARG2, 8(BASE) + |.endmacro + | + |.macro .ffunc_n, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + | // Either ldc1 or the 1st instruction of checknum is in the delay slot. + | .FPU ldc1 FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bnez AT, ->fff_fallback + |. gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | .FPU ldc1 FARG1, 0(BASE) + | and TMP0, TMP0, TMP1 + | .FPU ldc1 FARG2, 8(BASE) + | beqz TMP0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! + |.macro ffgccheck + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | dsubu AT, TMP0, TMP1 + | bgezal AT, ->fff_gcstep + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp AT, CARG1 + | sltiu AT, AT, LJ_TISTRUECOND + | beqz AT, ->fff_fallback + |. daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | daddu TMP2, RA, RD + | daddiu TMP1, BASE, 8 + | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. + |. sd CARG1, 0(RA) + |1: + | ld CRET1, 0(TMP1) + | sd CRET1, -16(TMP1) + | bne TMP1, TMP2, <1 + |. daddiu TMP1, TMP1, 8 + | b ->fff_res + |. nop + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | sltu TMP1, TISNUM, TMP0 + | not TMP2, TMP0 + | li TMP3, ~LJ_TISNUM + | movz TMP2, TMP3, TMP1 + | dsll TMP2, TMP2, 3 + | daddu TMP2, CFUNC:RB, TMP2 + | b ->fff_restv + |. ld CARG1, CFUNC:TMP2->upvalue + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | daddiu TMP0, TMP2, -LJ_TTAB + | daddiu TMP1, TMP2, -LJ_TUDATA + | movn TMP0, TMP1, TMP0 + | bnez TMP0, >6 + |. cleartp TAB:CARG1 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) + | beqz TAB:RB, ->fff_restv + |. li CARG1, LJ_TNIL + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | dsll TMP0, TMP1, 5 + | dsll TMP1, TMP1, 3 + | dsubu TMP1, TMP0, TMP1 + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | beq RC, TMP0, >5 + |. li AT, LJ_TTAB + | bnez NODE:TMP2, <3 + |. nop + |4: + | move CARG1, RB + | b ->fff_restv // Not found, keep default result. + |. settp CARG1, AT + |5: + | bne CARG1, TISNIL, ->fff_restv + |. nop + | b <4 // Ditto for nil value. + |. nop + | + |6: + | sltiu AT, TMP2, LJ_TISNUM + | movn TMP2, TISNUM, AT + | dsll TMP2, TMP2, 3 + | dsubu TMP0, DISPATCH, TMP2 + | b <2 + |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | daddiu AT, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or AT, AT, TAB:TMP0 + | bnez AT, ->fff_fallback + |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) + | beqz AT, ->fff_restv + |. sd TAB:CARG2, TAB:TMP1->metatable + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | load_got lj_tab_get + | gettp TMP0, CARG2 + | cleartp CARG2 + | daddiu TMP0, TMP0, -LJ_TTAB + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. daddiu CARG3, BASE, 8 + | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + |. move CARG1, L + | b ->fff_restv + |. ld CARG1, 0(CRET1) + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori AT, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP0, TISNUM, TMP1 + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. nop + | b ->fff_restv + |. nop + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | daddiu AT, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beqz AT, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) + | sltu TMP0, TISNUM, TMP0 + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback + |. sd BASE, L->base // Add frame since C call can throw. + | ffgccheck + |. sd PC, SAVE_PC // Redundant (but a defined value). + | load_got lj_strfmt_number + | move CARG1, L + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) + |. move CARG2, BASE + | // Returns GCstr *. + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback + | daddu TMP2, BASE, NARGS8:RC + | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. + | ld PC, FRAME_PC(BASE) + | load_got lj_tab_next + | sd BASE, L->base // Add frame since C call can throw. + | sd BASE, L->top // Dummy frame length is ok. + | daddiu CARG3, BASE, 8 + | sd PC, SAVE_PC + | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) + |. move CARG1, L + | // Returns 0 at end of traversal. + | beqz CRET1, ->fff_restv // End of traversal: return nil. + |. move CARG1, TISNIL + | ld TMP0, 8(BASE) + | daddiu RA, BASE, -16 + | ld TMP2, 16(BASE) + | sd TMP0, 0(RA) + | sd TMP2, 8(RA) + | b ->fff_res + |. li RD, (2+1)*8 + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + |. daddiu RA, BASE, -16 + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sextw TMP2, CARG2 + | addiu TMP2, TMP2, 1 + | sltu AT, TMP2, TMP0 + | daddiu RA, BASE, -16 + | zextw TMP0, TMP2 + | settp TMP0, TISNUM + | beqz AT, >2 // Not in array part? + |. sd TMP0, 0(RA) + | dsll TMP3, TMP2, 3 + | daddu TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + |. li RD, (0+1)*8 + | sd TMP1, -8(BASE) + | b ->fff_res + |. li RD, (2+1)*8 + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | load_got lj_tab_getinth + | beqz TMP0, ->fff_res + |. li RD, (0+1)*8 + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. move CARG2, TMP2 + | // Returns cTValue * or NULL. + | beqz CRET1, ->fff_res + |. li RD, (0+1)*8 + | b <1 + |. ld TMP1, 0(CRET1) + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | daddiu RA, BASE, -16 + | dsll AT, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd AT, 0(BASE) + | sd CFUNC:TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | daddiu NARGS8:RC, NARGS8:RC, -8 + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | bltz NARGS8:RC, ->fff_fallback + |. move TMP2, BASE + | daddiu BASE, BASE, 16 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | daddiu PC, TMP3, 16+FRAME_PCALL + | beqz NARGS8:RC, ->vm_call_dispatch + |1: + |. daddu TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | daddiu TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + |. nop + | b ->vm_call_dispatch + |. nop + | + |.ffunc xpcall + | daddiu NARGS8:RC, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bltz NARGS8:RC, ->fff_fallback + |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | gettp AT, CARG2 + | daddiu AT, AT, -LJ_TFUNC + | bnez AT, ->fff_fallback // Traceback must be a function. + |. move TMP2, BASE + | daddiu BASE, BASE, 24 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | beqz NARGS8:RC, ->vm_call_dispatch + |. daddiu PC, TMP3, 24+FRAME_PCALL + | b <1 + |. nop + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiu AT, TMP0, -LUA_YIELD + | daddu CARG3, CARG2, TMP0 + | daddiu TMP3, CARG2, 8 + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. movn CARG2, TMP3, AT + | xor TMP2, TMP2, CARG3 + | bnez TMP1, ->fff_fallback // cframe != 0? + |. or AT, TMP2, TMP0 + | ld TMP0, L:CARG1->maxstack + | beqz AT, ->fff_fallback // base == top && st == 0? + |. ld PC, FRAME_PC(BASE) + | daddu TMP2, CARG2, NARGS8:RC + | sltu AT, TMP0, TMP2 + | bnez AT, ->fff_fallback // Stack overflow? + |. sd PC, SAVE_PC + | sd BASE, L->base + |1: + |.if resume + | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. + | daddiu NARGS8:RC, NARGS8:RC, -8 + | daddiu TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | daddu TMP1, BASE, NARGS8:RC + | move CARG3, CARG2 + | sd BASE, L->top + |2: // Move args to coroutine. + | ld CRET1, 0(BASE) + | sltu AT, BASE, TMP1 + | beqz AT, >3 + |. daddiu BASE, BASE, 8 + | sd CRET1, 0(CARG3) + | b <2 + |. daddiu CARG3, CARG3, 8 + |3: + | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + |. move L:RA, L:CARG1 + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu AT, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | st_vmstate + | beqz AT, >8 + |. dsubu RD, TMP3, TMP2 + | ld TMP0, L->maxstack + | beqz RD, >6 // No results? + |. daddu TMP1, BASE, RD + | sltu AT, TMP0, TMP1 + | bnez AT, >9 // Need to grow stack? + |. daddu TMP3, TMP2, RD + | sd TMP2, L:RA->top // Clear coroutine stack. + | move TMP1, BASE + |5: // Move results from coroutine. + | ld CRET1, 0(TMP2) + | daddiu TMP2, TMP2, 8 + | sltu AT, TMP2, TMP3 + | sd CRET1, 0(TMP1) + | bnez AT, <5 + |. daddiu TMP1, TMP1, 8 + |6: + | andi TMP0, PC, FRAME_TYPE + |.if resume + | mov_true TMP1 + | daddiu RA, BASE, -8 + | sd TMP1, -8(BASE) // Prepend true to results. + | daddiu RD, RD, 16 + |.else + | move RA, BASE + | daddiu RD, RD, 8 + |.endif + |7: + | sd PC, SAVE_PC + | beqz TMP0, ->BC_RET_Z + |. move MULTRES, RD + | b ->vm_return + |. nop + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | daddiu TMP3, TMP3, -8 + | mov_false TMP1 + | ld CRET1, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | li RD, (2+1)*8 + | sd TMP1, -8(BASE) // Prepend false to results. + | daddiu RA, BASE, -8 + | sd CRET1, 0(BASE) // Copy error message. + | b <7 + |. andi TMP0, PC, FRAME_TYPE + |.else + | load_got lj_ffh_coroutine_wrap_err + | move CARG2, L:RA + | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + |. move CARG1, L + |.endif + | + |9: // Handle stack expansion on return from yield. + | load_got lj_state_growstack + | srl CARG2, RD, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | b <4 + |. li CRET1, 0 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | daddu TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | beqz TMP0, ->fff_fallback + |. li CRET1, LUA_YIELD + | sd r0, L->cframe + | b ->vm_leave_unw + |. sb CRET1, L->status + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | daddiu AT, CARG2, -LJ_TISNUM + | bnez AT, >1 + |. sextw TMP1, CARG1 + | sra TMP0, TMP1, 31 // Extract sign. + | xor TMP1, TMP1, TMP0 + | dsubu CARG1, TMP1, TMP0 + | dsll TMP3, CARG1, 32 + | bgez TMP3, ->fff_restv + |. settp CARG1, TISNUM + | li CARG1, 0x41e0 // 2^31 as a double. + | b ->fff_restv + |. dsll CARG1, CARG1, 48 + |1: + | sltiu AT, CARG2, LJ_TISNUM + | beqz AT, ->fff_fallback + |. dextm CARG1, CARG1, 0, 30 + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->vm_return + |. move MULTRES, RD + | lw INS, -4(PC) + | decode_RB8a RB, INS + | decode_RB8b RB + |5: + | sltu AT, RD, RB + | bnez AT, >6 // More results expected? + |. decode_RA8a TMP0, INS + | decode_RA8b TMP0 + | ins_next1 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | dsubu BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | daddu TMP1, RA, RD + | daddiu RD, RD, 8 + | b <5 + |. sd TISNIL, -8(TMP1) + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + |. load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |// TODO: Return integer type if result is integer (own sf implementation). + |.macro math_round, func + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. gettp TMP0, CARG1 + | beq TMP0, TISNUM, ->fff_restv + |. sltu AT, TMP0, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | bal ->vm_ .. func + |. nop + |.else + |. load_got func + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | li AT, 8 + | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. + |. ld CARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |. load_got log + |.if FPU + | call_extern + |. ldc1 FARG1, 0(BASE) + |.else + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.if FPU + |.ffunc_n math_sqrt + |. sqrt.d FRET1, FARG1 + |// fallthrough to ->fff_resn + |.else + | math_extern sqrt + |.endif + | + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | b ->fff_res1 + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + | + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. load_got ldexp + | .FPU ldc1 FARG1, 0(BASE) + | call_extern + |. lw CARG2, 8+LO(BASE) + | b ->fff_resn + |. nop + | + |.ffunc_n math_frexp + | load_got frexp + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) + | daddiu RA, BASE, -16 + |.if FPU + | mtc1 TMP1, FARG2 + | sdc1 FRET1, 0(RA) + | cvt.d.w FARG2, FARG2 + | sdc1 FARG2, 8(RA) + |.else + | sd CRET1, 0(RA) + | zextw TMP1, TMP1 + | settp TMP1, TISNUM + | sd TMP1, 8(RA) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.ffunc_n math_modf + | load_got modf + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, BASE, -16 + | daddiu RA, BASE, -16 + |.if FPU + | sdc1 FRET1, -8(BASE) + |.else + | sd CRET1, -8(BASE) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.macro math_minmax, name, intins, fpins + | .ffunc_1 name + | daddu TMP3, BASE, NARGS8:RC + | checkint CARG1, >5 + |. daddiu TMP2, BASE, 8 + |1: // Handle integers. + | beq TMP2, TMP3, ->fff_restv + |. ld CARG2, 0(TMP2) + | checkint CARG2, >3 + |. sextw CARG1, CARG1 + | lw CARG2, LO(TMP2) + |. slt AT, CARG1, CARG2 + | intins CARG1, CARG2, AT + | daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. settp CARG1, TISNUM + | + |3: // Convert intermediate result to number and continue with number loop. + | checknum CARG2, ->fff_fallback + |.if FPU + |. mtc1 CARG1, FRET1 + | cvt.d.w FRET1, FRET1 + | b >7 + |. ldc1 FARG1, 0(TMP2) + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b >7 + |. nop + |.endif + | + |5: + | .FPU ldc1 FRET1, 0(BASE) + | checknum CARG1, ->fff_fallback + |6: // Handle numbers. + |. ld CARG2, 0(TMP2) + | beq TMP2, TMP3, ->fff_resn + |.if FPU + | ldc1 FARG1, 0(TMP2) + |.else + | move CRET1, CARG1 + |.endif + | checknum CARG2, >8 + |. nop + |7: + |.if FPU + | c.olt.d FRET1, FARG1 + | fpins FRET1, FARG1 + |.else + | bal ->vm_sfcmpolt + |. nop + | intins CARG1, CARG2, CRET1 + |.endif + | b <6 + |. daddiu TMP2, TMP2, 8 + | + |8: // Convert integer to number and continue with number loop. + | checkint CARG2, ->fff_fallback + |.if FPU + |. lwc1 FARG1, LO(TMP2) + | b <7 + |. cvt.d.w FARG1, FARG1 + |.else + |. lw CARG2, LO(TMP2) + | bal ->vm_sfi2d_2 + |. nop + | b <7 + |. nop + |.endif + | + |.endmacro + | + | math_minmax math_min, movz, movf.d + | math_minmax math_max, movn, movt.d + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 + | daddiu TMP0, TMP0, -LJ_TSTR + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback // Need exactly 1 string argument. + |. cleartp STR:CARG1 + | lw TMP0, STR:CARG1->len + | daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | sltu RD, r0, TMP0 + | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). + | addiu RD, RD, 1 + | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp TMP1, TISNUM + | b ->fff_res + |. sd TMP1, 0(RA) + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + |. nop + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. + | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP1, 255 + | sextw CARG1, CARG1 + | or AT, AT, TMP0 + | sltu TMP1, TMP1, CARG1 // !(255 < n). + | or AT, AT, TMP1 + | bnez AT, ->fff_fallback + |. li CARG3, 1 + | daddiu CARG2, sp, TMPD_OFS + | sb CARG1, TMPD + |->fff_newstr: + | load_got lj_str_new + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_str_new // (lua_State *L, char *str, size_t l) + |. move CARG1, L + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |.ffunc string_sub + | ffgccheck + |. nop + | addiu AT, NARGS8:RC, -16 + | ld TMP0, 0(BASE) + | bltz AT, ->fff_fallback + |. gettp TMP3, TMP0 + | cleartp STR:CARG1, TMP0 + | ld CARG2, 8(BASE) + | beqz AT, >1 + |. li CARG4, -1 + | ld CARG3, 16(BASE) + | checkint CARG3, ->fff_fallback + |. sextw CARG4, CARG3 + |1: + | checkint CARG2, ->fff_fallback + |. li AT, LJ_TSTR + | bne TMP3, AT, ->fff_fallback + |. sextw CARG3, CARG2 + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | slt AT, CARG4, r0 + | addiu TMP0, CARG2, 1 + | addu TMP1, CARG4, TMP0 + | slt TMP3, CARG3, r0 + | movn CARG4, TMP1, AT // if (end < 0) end += len+1 + | addu TMP1, CARG3, TMP0 + | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | movn CARG4, r0, AT // if (end < 0) end = 0 + | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | movn CARG4, CARG2, AT // if (end > len) end = len + | daddu CARG2, STR:CARG1, CARG3 + | subu CARG3, CARG4, CARG3 // len = end - start + | daddiu CARG2, CARG2, sizeof(GCstr)-1 + | bgez CARG3, ->fff_newstr + |. addiu CARG3, CARG3, 1 // len++ + |->fff_emptystr: // Return empty string. + | li AT, LJ_TSTR + | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) + | b ->fff_restv + |. settp CARG1, AT + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + |. nop + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) + | load_got lj_buf_putstr_ .. name + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->p + | call_intern extern lj_buf_putstr_ .. name + |. sd PC, SAVE_PC + | load_got lj_buf_tostr + | call_intern lj_buf_tostr + |. move SBUF:CARG1, SBUF:CRET1 + | b ->fff_resstr + |. ld BASE, L->base + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | beqz TMP1, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | add.d FARG1, FARG1, TOBIT + | mfc1 CRET1, FARG1 + | jr ra + |. zextw CRET1, CRET1 + |.else + |// FP number to bit conversion for soft-float. + |->vm_tobit: + | dsll TMP0, CARG1, 1 + | li CARG3, 1076 + | dsrl AT, TMP0, 53 + | dsubu CARG3, CARG3, AT + | sltiu AT, CARG3, 54 + | beqz AT, >1 + |. dextm TMP0, TMP0, 0, 20 + | dinsu TMP0, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, TMP0, CARG3 + | dsubu TMP0, r0, CRET1 + | movn CRET1, TMP0, AT + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 + |.endif + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >6 + |. zextw CRET1, CARG1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + |6: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | daddiu TMP2, BASE, 8 + | daddu TMP3, BASE, NARGS8:RC + |1: + | beq TMP2, TMP3, ->fff_resi + |. ld CARG1, 0(TMP2) + | gettp TMP0, CARG1 + |.if FPU + | bne TMP0, TISNUM, >2 + |. daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |2: + | ldc1 FARG1, -8(TMP2) + | sltiu AT, TMP0, LJ_TISNUM + | beqz AT, ->fff_fallback + |. add.d FARG1, FARG1, TOBIT + | mfc1 CARG1, FARG1 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |.else + | beq TMP0, TISNUM, >2 + |. move CRET2, CRET1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET2 + |2: + | zextw CARG1, CARG1 + | bins CRET1, CRET1, CARG1 + | b <1 + |. daddiu TMP2, TMP2, 8 + |.endif + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | dsrl TMP0, CRET1, 8 + | dsrl TMP1, CRET1, 24 + | andi TMP2, TMP0, 0xff00 + | dins TMP1, CRET1, 24, 31 + | dins TMP2, TMP0, 16, 23 + | b ->fff_resi + |. or CRET1, TMP1, TMP2 + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | b ->fff_resi + |. zextw CRET1, CRET1 + | + |.macro .ffunc_bit_sh, name, shins, shmod + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + |. nop + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET1 + |1: + | gettp TMP0, CARG2 + | bne TMP0, TISNUM, ->fff_fallback + |. zextw CARG2, CARG2 + | sextw CARG1, CARG1 + |.if shmod == 1 + | negu CARG2, CARG2 + |.endif + | shins CRET1, CARG1, CARG2 + | b ->fff_resi + |. zextw CRET1, CRET1 + |.endmacro + | + |.ffunc_bit_sh lshift, sllv, 0 + |.ffunc_bit_sh rshift, srlv, 0 + |.ffunc_bit_sh arshift, srav, 0 + |.ffunc_bit_sh rol, rotrv, 1 + |.ffunc_bit_sh ror, rotrv, 0 + | + |.ffunc_bit tobit + |->fff_resi: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | settp CRET1, TISNUM + | b ->fff_res1 + |. sd CRET1, -16(BASE) + | + |//----------------------------------------------------------------------- + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld TMP3, CFUNC:RB->f + | daddu TMP1, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | daddiu TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC // Redundant (but a defined value). + | sltu AT, TMP2, TMP0 + | sd BASE, L->base + | sd TMP1, L->top + | bnez AT, >5 // Need to grow stack. + |. move CFUNCADDR, TMP3 + | jalr TMP3 // (lua_State *L) + |. move CARG1, L + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | sll RD, CRET1, 3 + | bgtz CRET1, ->fff_res // Returned nresults+1? + |. daddiu RA, BASE, -16 + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | cleartp LFUNC:RB + | bnez CRET1, ->vm_call_tail // Returned -1? + |. dsubu NARGS8:RC, TMP0, BASE + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | li AT, -4 + | bnez TMP0, >3 + |. and TMP1, PC, AT + | lbu TMP1, OFS_RA(PC) + | sll TMP1, TMP1, 3 + | addiu TMP1, TMP1, 16 + |3: + | b ->vm_call_dispatch // Resolve again for tailcall. + |. dsubu TMP2, BASE, TMP1 + | + |5: // Grow stack for fallback handler. + | load_got lj_state_growstack + | li CARG2, LUA_MINSTACK + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | b <1 + |. li CRET1, 0 // Force retry. + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | move MULTRES, ra + | load_got lj_gc_step + | sd BASE, L->base + | daddu TMP0, BASE, NARGS8:RC + | sd PC, SAVE_PC // Redundant (but a defined value). + | sd TMP0, L->top + | call_intern lj_gc_step // (lua_State *L) + |. move CARG1, L + | ld BASE, L->base + | move ra, MULTRES + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | jr ra + |. dsubu NARGS8:RC, TMP0, BASE + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. + | bnez AT, >5 + | // Decrement the hookcount for consistency, but always do the call. + |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE + | bnez AT, >1 + |. addiu TMP2, TMP2, -1 + | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, >1 + |. nop + | b >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | beqz AT, >1 + |5: // Re-dispatch to static ins. + |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr AT + |. nop + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | bnez AT, <5 + |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, <5 + |. addiu TMP2, TMP2, -1 + | beqz TMP2, >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, LUA_MASKLINE + | beqz AT, <5 + |1: + |. load_got lj_dispatch_ins + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sd BASE, L->base + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |. move CARG1, L + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + | decode_RA8b RA + | + |->cont_hook: // Continue from hook yield. + | daddiu PC, PC, 4 + | b <4 + |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | daddiu CARG1, DISPATCH, GG_DISP2J + | cleartp LFUNC:TMP1 + | sd PC, SAVE_PC + | ld TMP1, LFUNC:TMP1->pc + | move CARG2, PC + | sd L, DISPATCH_J(L)(DISPATCH) + | lbu TMP1, PC2PROTO(framesize)(TMP1) + | load_got lj_trace_hot + | sd BASE, L->base + | dsll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) + |. sd TMP1, L->top + | b <3 + |. nop + |.endif + | + | + |->vm_callhook: // Dispatch target for call hooks. + |.if JIT + | b >1 + |.endif + |. move CARG2, PC + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | ori CARG2, PC, 1 + |1: + |.endif + | load_got lj_dispatch_call + | daddu TMP0, BASE, RC + | sd PC, SAVE_PC + | sd BASE, L->base + | dsubu RA, RA, BASE + | sd TMP0, L->top + | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top + | sd r0, SAVE_PC // Invalidate for subsequent line hook. + | dsubu NARGS8:RC, TMP0, BASE + | daddu RA, BASE, RA + | ld LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | jr CRET1 + |. lw INS, -4(PC) + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | ld TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA8a RC, INS + | daddiu AT, MULTRES, -8 + | cleartp TRACE:TMP2 + | decode_RA8b RC + | beqz AT, >2 + |. daddu RC, BASE, RC // Call base. + |1: // Move results down. + | ld CARG1, 0(RA) + | daddiu AT, AT, -8 + | daddiu RA, RA, 8 + | sd CARG1, 0(RC) + | bnez AT, <1 + |. daddiu RC, RC, 8 + |2: + | decode_RA8a RA, INS + | decode_RB8a RB, INS + | decode_RA8b RA + | decode_RB8b RB + | daddu RA, RA, RB + | daddu RA, BASE, RA + |3: + | sltu AT, RC, RA + | bnez AT, >9 // More results wanted? + |. nop + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | beq RD, TMP3, ->cont_nop // Blacklisted. + |. load_got lj_dispatch_stitch + | bnez RD, =>BC_JLOOP // Jump to stitched trace. + |. sll RD, RD, 3 + | + | // Stitch a new trace to the previous trace. + | sw TMP3, DISPATCH_J(exitno)(DISPATCH) + | sd L, DISPATCH_J(L)(DISPATCH) + | sd BASE, L->base + | daddiu CARG1, DISPATCH, GG_DISP2J + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + |. move CARG2, PC + | b ->cont_nop + |. ld BASE, L->base + | + |9: + | sd TISNIL, 0(RC) + | b <3 + |. daddiu RC, RC, 8 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sd BASE, L->base + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | daddiu PC, PC, -4 + | b ->cont_nop + |. ld BASE, L->base +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + |.if FPU + | sdc1 f..a, a*8(sp) + | sdc1 f..b, b*8(sp) + | sd r..a, 32*8+a*8(sp) + | sd r..b, 32*8+b*8(sp) + |.else + | sd r..a, a*8(sp) + | sd r..b, b*8(sp) + |.endif + |.endmacro + | + |->vm_exit_handler: + |.if JIT + |.if FPU + | daddiu sp, sp, -(32*8+32*8) + |.else + | daddiu sp, sp, -(32*8) + |.endif + | savex_ 0, 1 + | savex_ 2, 3 + | savex_ 4, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 26, 27 + | savex_ 28, 30 + |.if FPU + | sdc1 f29, 29*8(sp) + | sdc1 f31, 31*8(sp) + | sd r0, 32*8+31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. + | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP + |.else + | sd r0, 31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8 // Recompute original value of sp. + | sd TMP2, 29*8(sp) // Store sp in RID_SP + |.endif + | li_vmstate EXIT + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | lw TMP1, 0(TMP2) // Load exit number. + | st_vmstate + | ld L, DISPATCH_GL(cur_L)(DISPATCH) + | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) + | load_got lj_trace_exit + | sd L, DISPATCH_J(L)(DISPATCH) + | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. + | sd BASE, L->base + | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. + | daddiu CARG1, DISPATCH, GG_DISP2J + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) + |. move CARG2, sp + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | li AT, -4 + | ld BASE, L->base + | and sp, TMP1, AT + | ld PC, SAVE_PC // Get SAVE_PC. + | b >1 + |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). + |.endif + |->vm_exit_interp: + |.if JIT + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ld L, SAVE_L + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | sd BASE, L->base + |1: + | bltz CRET1, >9 // Check for error from exit. + |. ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsll MULTRES, CRET1, 3 + | cleartp LFUNC:RB + | sw MULTRES, SAVE_MULTRES + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | .FPU mtc1 TMP3, TOBIT + | ld TMP1, LFUNC:RB->pc + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | ld KBASE, PC2PROTO(k)(TMP1) + | .FPU cvt.d.s TOBIT, TOBIT + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) + | daddiu PC, PC, 4 + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | sltiu TMP2, TMP1, BC_FUNCF*8 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, 0(TMP0) + | decode_RA8a RA, INS + | beqz TMP2, >2 + |. decode_RA8b RA + | jr AT + |. decode_RD8b RD + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? + | bnez TMP2, >3 + |. ld TMP1, FRAME_PC(BASE) + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + |. nop + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8a TMP0, TMP2 + | decode_RA8b TMP0 + | dsubu TMP1, BASE, TMP0 + | ld LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ld TMP1, LFUNC:TMP2->pc + | ld KBASE, PC2PROTO(k)(TMP1) + |3: + | daddiu RC, MULTRES, -8 + | jr AT + |. daddu RA, RA, BASE + | + |9: // Rethrow error from the right C frame. + | load_got lj_err_throw + | negu CARG2, CRET1 + | call_intern lj_err_throw // (lua_State *L, int errcode) + |. move CARG1, L + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Hard-float round to integer. + |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. + |.macro vm_round_hf, func + | lui TMP0, 0x4330 // Hiword of 2^52 (double). + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | abs.d FRET2, FARG1 // |x| + | dmfc1 AT, FARG1 + | c.olt.d 0, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1f 0, >1 // Truncate only if |x| < 2^52. + |. sub.d FRET1, FRET1, f4 + | slt AT, AT, r0 + |.if "func" == "ceil" + | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. + |.else + | lui TMP0, 0x3ff0 // Hiword of +1 (double). + |.endif + |.if "func" == "trunc" + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | c.olt.d 0, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | movt.d FRET1, FRET2, 0 // If yes, subtract +1. + | neg.d FRET2, FRET1 + | jr ra + |. movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.else + | neg.d FRET2, FRET1 + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.if "func" == "ceil" + | c.olt.d 0, FRET1, FARG1 // x > result? + |.else + | c.olt.d 0, FARG1, FRET1 // x < result? + |.endif + | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. + | jr ra + |. movt.d FRET1, FRET2, 0 + |.endif + |1: + | jr ra + |. mov.d FRET1, FARG1 + |.endmacro + | + |.macro vm_round, func + |.if FPU + | vm_round_hf, func + |.endif + |.endmacro + | + |->vm_floor: + | vm_round floor + |->vm_ceil: + | vm_round ceil + |->vm_trunc: + |.if JIT + | vm_round trunc + |.endif + | + |// Soft-float integer to number conversion. + |.macro sfi2d, ARG + |.if not FPU + | beqz ARG, >9 // Handle zero first. + |. sra TMP0, ARG, 31 + | xor TMP1, ARG, TMP0 + | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. + | dclz ARG, TMP1 + | addiu ARG, ARG, -11 + | li AT, 0x3ff+63-11-1 + | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. + | subu ARG, AT, ARG // Exponent - 1. + | ins ARG, TMP0, 11, 11 // Sign | Exponent. + | dsll ARG, ARG, 52 // Align left. + | jr ra + |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. + |9: + | jr ra + |. nop + |.endif + |.endmacro + | + |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d CARG1 + | + |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d CARG2 + | + |// Soft-float comparison. Equivalent to c.eq.d. + |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpeq: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. xor AT, CARG1, CARG2 + | jr ra + |. sltiu CRET1, AT, 1 // Same values: return 1. + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. + |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. + |->vm_sfcmpult: + |.if not FPU + | b >1 + |. li CRET2, 1 + |.endif + | + |->vm_sfcmpolt: + |.if not FPU + | li CRET2, 0 + |1: + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG1, CARG2 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG1, CARG2 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG2, CARG1 + |8: + | jr ra + |. nop + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. and AT, CARG1, CARG2 + | xor AT, AT, TMP3 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in r1, g in r2. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:r2->ctype_state + | daddiu DISPATCH, r2, GG_G2DISP + | load_got lj_ccallback_enter + | sw r1, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] + | daddiu TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd r0, SAVE_PC // Any value outside of bytecode is ok. + | move CARG2, sp + | call_intern lj_ccallback_enter // (CTState *cts, void *cf) + |. move CARG1, CTSTATE + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | move L, CRET1 + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU mtc1 TMP3, TOBIT + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | li_vmstate INTERP + | subu RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | .FPU cvt.d.s TOBIT, TOBIT + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | load_got lj_ccallback_leave + | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | move CARG2, RA + | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) + |. move CARG1, CTSTATE + | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] + | b ->vm_leave_unw + |. ld CRET2, CTSTATE->cb.gpr[1] + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | move TMP2, sp + | dsubu sp, sp, TMP1 + | sd ra, -8(TMP2) + | sll CARG2, CARG2, 3 + | sd r16, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | move r16, TMP2 + | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) + | move TMP2, sp + | beqz CARG2, >2 + |. daddu TMP3, TMP1, CARG2 + |1: + | ld TMP0, 0(TMP1) + | daddiu TMP1, TMP1, 8 + | sltu AT, TMP1, TMP3 + | sd TMP0, 0(TMP2) + | bnez AT, <1 + |. daddiu TMP2, TMP2, 8 + |2: + | ld CFUNCADDR, CCSTATE->func + | .FPU ldc1 FARG1, CCSTATE->gpr[0] + | ld CARG2, CCSTATE->gpr[1] + | .FPU ldc1 FARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | .FPU ldc1 FARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | .FPU ldc1 FARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | .FPU ldc1 FARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | .FPU ldc1 FARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | .FPU ldc1 FARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | .FPU ldc1 FARG8, CCSTATE->gpr[7] + | jalr CFUNCADDR + |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | ld CCSTATE:TMP1, -24(r16) + | ld TMP2, -16(r16) + | ld ra, -8(r16) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + |.if FPU + | sdc1 FRET1, CCSTATE:TMP1->fpr[0] + | sdc1 FRET2, CCSTATE:TMP1->fpr[1] + |.else + | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. + |.endif + | move sp, r16 + | jr ra + |. move r16, TMP2 + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp + | daddu RA, BASE, RA + | daddu RD, BASE, RD + | ld ARGRA, 0(RA) + | ld ARGRD, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, ARGRA + | gettp CARG4, ARGRD + | bne CARG3, TISNUM, >2 + |. daddiu PC, PC, 4 + | bne CARG4, TISNUM, >5 + |. decode_RD4b TMP2 + | sextw ARGRA, ARGRA + | sextw ARGRD, ARGRD + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | slt AT, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + | movop TMP2, r0, AT + |1: + | daddu PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, >4 + |. decode_RD4b TMP2 + |.if FPU + | ldc1 FRA, 0(RA) + | ldc1 FRD, 0(RD) + |.endif + |3: // RA and RD are both numbers. + |.if FPU + | fcomp f20, f22 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 + |. movop TMP2, r0, CRET1 + |.endif + | + |4: // RA is a number, RD is not a number. + | bne CARG4, TISNUM, ->vmeta_comp + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FRD, LO(RD) + | ldc1 FRA, 0(RA) + | b <3 + |. cvt.d.w FRD, FRD + |.else + |.if "ARGRD" == "CARG1" + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + |.endif + | b <3 + |. nop + |.endif + | + |5: // RA is an integer, RD is not an integer + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + | lwc1 FRA, LO(RA) + | ldc1 FRD, 0(RD) + | b <3 + | cvt.d.w FRA, FRA + |.else + |.if "ARGRA" == "CARG1" + | bal ->vm_sfi2d_1 + |. sextw CARG1, CARG1 + |.else + | bal ->vm_sfi2d_2 + |. sextw CARG2, CARG2 + |.endif + | b <3 + |. nop + |.endif + |.endmacro + | + if (op == BC_ISLT) { + | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult + } else { + | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult + } + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | daddu RD, BASE, RD + | ld CARG1, 0(RA) + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu AT, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or AT, AT, TMP1 + if (vk) { + | beqz AT, ->BC_ISEQN_Z + } else { + | beqz AT, ->BC_ISNEN_Z + } + | // Either or both types are not numbers. + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + |.if FFI + |. li AT, LJ_TCDATA + | beq CARG3, AT, ->vmeta_equal_cd + |.endif + | decode_RD4b TMP2 + |.if FFI + | beq CARG4, AT, ->vmeta_equal_cd + |. nop + |.endif + | bne CARG1, CARG2, >2 + |. addu TMP2, TMP2, TMP3 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | daddu PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor AT, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? + | movn TMP0, r0, AT + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + |. cleartp TAB:TMP1, CARG1 + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<<MM_eq + | bnez TMP3, >1 // Or 'no __eq' flag set? + } else { + | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<<MM_eq + | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set? + } + |. nop + | b ->vmeta_equal // Handle __eq metamethod. + |. li TMP0, 1-vk // ne = 0 or 1. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | ld CARG1, 0(RA) + | dsubu RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp TMP0, CARG1 + | li AT, LJ_TCDATA + |.endif + | li TMP1, LJ_TSTR + | decode_RD4b TMP2 + |.if FFI + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. settp CARG2, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | xor TMP1, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + if (vk) { + | movn TMP2, r0, TMP1 + } else { + | movz TMP2, r0, TMP1 + } + | daddu PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | daddu RA, BASE, RA + | daddu RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | daddiu PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | bne CARG3, TISNUM, >3 + |. decode_RD4b TMP2 + | bne CARG4, TISNUM, >6 + |. addu TMP2, TMP2, TMP3 + | xor AT, CARG1, CARG2 + if (vk) { + | movn TMP2, r0, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | movz TMP2, r0, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + | ins_next + | + |3: // RA is not an integer. + | sltu AT, CARG3, TISNUM + |.if FFI + | beqz AT, >8 + |.else + | beqz AT, <2 + |.endif + |. addu TMP2, TMP2, TMP3 + | sltu AT, CARG4, TISNUM + |.if FPU + | ldc1 f20, 0(RA) + | ldc1 f22, 0(RD) + |.endif + | beqz AT, >5 + |. nop + |4: // RA and RD are both numbers. + |.if FPU + | c.eq.d f20, f22 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } + |.else + | bal ->vm_sfcmpeq + |. nop + | b <1 + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI + | bne CARG4, TISNUM, >9 + |.else + | bne CARG4, TISNUM, <2 + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 f22, LO(RD) + | b <4 + |. cvt.d.w f22, f22 + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + | b <4 + |. nop + |.endif + | + |6: // RA is an integer, RD is not an integer + | sltu AT, CARG4, TISNUM + |.if FFI + | beqz AT, >9 + |.else + | beqz AT, <2 + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. lwc1 f20, LO(RA) + | ldc1 f22, 0(RD) + | b <4 + | cvt.d.w f20, f20 + |.else + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + | b <4 + |. nop + |.endif + | + |.if FFI + |8: + | li AT, LJ_TCDATA + | bne CARG3, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |9: + | li AT, LJ_TCDATA + | bne CARG4, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(RA) + | lhu TMP2, OFS_RD(PC) + | not TMP1, TMP1 + | gettp TMP0, TMP0 + | daddiu PC, PC, 4 + |.if FFI + | li AT, LJ_TCDATA + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. xor TMP0, TMP0, TMP1 + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + if (vk) { + | movn TMP2, r0, TMP0 + } else { + | movz TMP2, r0, TMP0 + } + | daddu PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | daddu RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | daddiu PC, PC, 4 + | gettp TMP0, TMP0 + | sltiu TMP0, TMP0, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISF) { + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + if (op == BC_IST) { + | movz TMP2, r0, TMP0 + } else { + | movn TMP2, r0, TMP0 + } + | daddu PC, PC, TMP2 + } else { + | ld CRET1, 0(RD) + if (op == BC_ISTC) { + | beqz TMP0, >1 + } else { + | bnez TMP0, >1 + } + |. daddu RA, BASE, RA + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + | sd CRET1, 0(RA) + | daddu PC, PC, TMP2 + |1: + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | daddu TMP2, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(TMP2) + | ins_next1 + | gettp TMP0, TMP0 + | daddu AT, TMP0, TMP1 + | bnez AT, ->vmeta_istype + |. ins_next2 + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | daddu TMP2, BASE, RA + | ld TMP0, 0(TMP2) + | ins_next1 + | checknum TMP0, ->vmeta_istype + |. ins_next2 + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(RD) + | li AT, LJ_TTRUE + | gettp TMP0, TMP0 + | sltu TMP0, AT, TMP0 + | addiu TMP0, TMP0, 1 + | dsll TMP0, TMP0, 47 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | daddu RB, BASE, RD + | ld CARG1, 0(RB) + | daddu RA, BASE, RA + | gettp CARG3, CARG1 + | bne CARG3, TISNUM, >2 + |. lui TMP1, 0x8000 + | sextw CARG1, CARG1 + | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + |. negu CARG1, CARG1 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + |1: + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + |2: + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_unm + |. dsll TMP1, TMP1, 32 + | b <1 + |. xor CARG1, CARG1, TMP1 + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | daddu CARG2, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(CARG2) + | gettp TMP1, TMP0 + | daddiu AT, TMP1, -LJ_TSTR + | bnez AT, >2 + |. cleartp STR:CARG1, TMP0 + | lw CRET1, STR:CARG1->len + |1: + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + |2: + | daddiu AT, TMP1, -LJ_TTAB + | bnez AT, ->vmeta_len + |. nop +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |. nop + |3: +#endif + |->BC_LEN_Z: + | load_got lj_tab_len + | call_intern lj_tab_len // (GCtab *t) + |. nop + | // Returns uint32_t (but less than 2^31). + | b <1 + |. nop +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_len + | bnez TMP0, <3 // 'no __len' flag set: done. + |. nop + | b ->vmeta_len + |. nop +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | bal ->vm_floor // floor(b/c) + |. div.d FARG1, b, c + | mul.d a, FRET1, c + | sub.d a, b, a // b - floor(b/c)*c + |.endmacro + + |.macro sfpmod + | daddiu sp, sp, -16 + | + | load_got __divdf3 + | sd CARG1, 0(sp) + | call_extern + |. sd CARG2, 8(sp) + | + | load_got floor + | call_extern + |. move CARG1, CRET1 + | + | load_got __muldf3 + | move CARG1, CRET1 + | call_extern + |. ld CARG2, 8(sp) + | + | load_got __subdf3 + | ld CARG1, 0(sp) + | call_extern + |. move CARG2, CRET1 + | + | daddiu sp, sp, 16 + |.endmacro + + |.macro ins_arithpre, label + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||switch (vk) { + ||case 0: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, KBASE, RC + || break; + ||case 1: + | decode_RB8a RC, INS + | decode_RB8b RC + | decode_RDtoRC8 RB, RD + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | daddu RC, BASE, RC + |.if "label" ~= "none" + | b label + |.endif + |. daddu RB, KBASE, RB + || break; + ||default: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = src2*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arith, intins, fpins, fpcall, label + | ins_arithpre none + | + |.if "label" ~= "none" + |label: + |.endif + | + |// Used in 5. + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | + |.if "intins" ~= "div" + | + | // Check for two integers. + | sextw CARG3, CARG1 + | bne TMP0, TISNUM, >5 + |. sextw CARG4, CARG2 + | bne TMP1, TISNUM, >5 + | + |.if "intins" == "addu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "subu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "mult" + |. intins CARG3, CARG4 + | mflo CRET1 + | mfhi TMP2 + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. daddu RA, BASE, RA + |.else + |. load_got lj_vm_modi + | beqz CARG4, ->vmeta_arith + |. daddu RA, BASE, RA + | move CARG1, CARG3 + | call_extern + |. move CARG2, CARG4 + |.endif + | + | zextw CRET1, CRET1 + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + |3: + | ins_next2 + | + |.endif + | + |5: // Check for two numbers. + | .FPU ldc1 f20, 0(RB) + | sltu AT, TMP0, TISNUM + | sltu TMP0, TMP1, TISNUM + | .FPU ldc1 f22, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + | + |.if FPU + | fpins FRET1, f20, f22 + |.elif "fpcall" == "sfpmod" + | sfpmod + |.else + | load_got fpcall + | call_extern + |. nop + |.endif + | + | ins_next1 + |.if "intins" ~= "div" + | b <3 + |.endif + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + |.if "intins" == "div" + | ins_next2 + |.endif + | + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith addu, add.d, __adddf3, none + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith subu, sub.d, __subdf3, none + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith mult, mul.d, __muldf3, none + break; + case BC_DIVVN: + | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z + break; + case BC_DIVNV: case BC_DIVVV: + | ins_arithpre ->BC_DIVVN_Z + break; + case BC_MODVN: + | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre ->BC_MODVN_Z + break; + case BC_POW: + | ins_arithpre none + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and AT, TMP0, TMP1 + | load_got pow + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + |.if FPU + | ldc1 FARG1, 0(RB) + | ldc1 FARG2, 0(RC) + |.endif + | call_extern + |. nop + | ins_next1 + |.if FPU + | sdc1 FRET1, 0(RA) + |.else + | sd CRET1, 0(RA) + |.endif + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | dsubu CARG3, RC, RB + | sd BASE, L->base + | daddu CARG2, BASE, RC + | move MULTRES, RB + |->BC_CAT_Z: + | load_got lj_meta_cat + | srl CARG3, CARG3, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | bnez CRET1, ->vmeta_binop + |. ld BASE, L->base + | daddu RB, BASE, MULTRES + | ld CRET1, 0(RB) + | daddu RA, BASE, RA + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sra RD, INS, 16 + | daddu RA, BASE, RA + | zextw RD, RD + | ins_next1 + | settp RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | daddu RD, KBASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | daddu RA, BASE, RA + | dsll TMP0, RD, 44 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | daddu RA, BASE, RA + | sd TISNIL, 0(RA) + | daddiu RA, RA, 8 + | daddu RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt AT, RA, RD + | bnez AT, <1 + |. daddiu RA, RA, 8 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RA, BASE, RA + | cleartp LFUNC:RB + | daddu RD, RD, LFUNC:RB + | ld UPVAL:RB, LFUNC:RD->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | ld CRET1, 0(TMP1) + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, BASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:RB->closed + | gettp TMP2, CRET1 + | sd CRET1, 0(CARG2) + | li AT, LJ_GC_BLACK|1 + | or TMP3, TMP3, TMP0 + | beq TMP3, AT, >2 // Upvalue is closed and black? + |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | beqz AT, <1 // tvisgcv(v) + |. cleartp GCOBJ:CRET1, CRET1 + | lbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | lbu TMP3, STR:TMP1->marked + | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:RB->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | bnez AT, >2 + |. sd TMP1, 0(CARG2) + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz AT, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | ld TMP1, UPVAL:RB->v + | ins_next1 + | sd CRET1, 0(TMP1) + | ins_next2 + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsll TMP0, RD, 44 + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | not TMP0, TMP0 + | ld UPVAL:RB, LFUNC:RA->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | sd TMP0, 0(TMP1) + | ins_next2 + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | load_got lj_func_closeuv + | sd BASE, L->base + | beqz TMP2, >1 + |. move CARG1, L + | call_intern lj_func_closeuv // (lua_State *L, TValue *level) + |. daddu CARG2, BASE, RA + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | load_got lj_func_newL_gc + | dsubu TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC + | cleartp CARG3 + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern lj_func_newL_gc + |. move CARG1, L + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | daddu RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | sd BASE, L->base + | sd PC, SAVE_PC + | sltu AT, TMP0, TMP1 + | beqz AT, >5 + |1: + if (op == BC_TNEW) { + | load_got lj_tab_new + | srl CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | li TMP0, 0x801 + | addiu AT, CARG2, -0x7ff + | srl CARG3, RD, 14 + | movz CARG2, TMP0, AT + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern lj_tab_new + |. move CARG1, L + | // Returns Table *. + } else { + | load_got lj_tab_dup + | dsubu TMP1, KBASE, RD + | move CARG1, L + | call_intern lj_tab_dup // (lua_State *L, Table *kt) + |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | daddu RA, BASE, RA + | settp CRET1, TMP0 + | sd CRET1, 0(RA) + | ins_next2 + |5: + | load_got lj_gc_step_fixtop + | move MULTRES, RD + | call_intern lj_gc_step_fixtop // (lua_State *L) + |. move CARG1, L + | b <1 + |. move RD, MULTRES + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP2, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP2 + | ld TAB:RB, LFUNC:TMP2->env + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + |. daddu RA, BASE, RA + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | bne TMP3, TISNUM, >5 // Integer key? + |. lw TMP0, TAB:RB->asize + | sextw TMP2, TMP2 + | ld TMP1, TAB:RB->array + | sltu AT, TMP2, TMP0 + | sll TMP2, TMP2, 3 + | beqz AT, ->vmeta_tgetv // Integer key and in array part? + |. daddu TMP2, TMP1, TMP2 + | ld AT, 0(TMP2) + | beq AT, TISNIL, >2 + |. ld CRET1, 0(TMP2) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_index + | bnez TMP0, <1 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgetv + |. nop + | + |5: + | li AT, LJ_TSTR + | bne TMP3, AT, ->vmeta_tgetv + |. cleartp RC, TMP2 + | b ->BC_TGETS_Z // String key? + |. nop + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RC8a RC, INS + | daddu CARG2, BASE, RB + | decode_RC8b RC + | ld TAB:RB, 0(CARG2) + | dsubu CARG3, KBASE, RC + | daddu RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CRET1, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne CARG1, RC, >4 + |. ld TAB:TMP3, TAB:RB->metatable + | beq CRET1, TISNIL, >5 // Key found, but nil value? + |. nop + |3: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |4: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | beqz TAB:TMP3, <3 // No metatable: done. + |. move CRET1, TISNIL + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<<MM_index + | bnez TMP0, <3 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgets + |. nop + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tgetb + |. daddu RC, TMP2, RC + | ld AT, 0(RC) + | beq AT, TISNIL, >5 + |. ld CRET1, 0(RC) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<<MM_index + | bnez TMP1, <1 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + |. nop + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu RB, BASE, RB + | daddu RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, LO(RC) + | daddu RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | sltu AT, CARG2, TMP0 + | sll TMP2, CARG2, 3 + | beqz AT, ->vmeta_tgetr // In array part? + |. daddu CRET1, TMP1, TMP2 + | ld CARG2, 0(CRET1) + |->BC_TGETR_Z: + | ins_next1 + | sd CARG2, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab RB, ->vmeta_tsetv + | checkint TMP2, >5 + |. sextw RC, TMP2 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 + | beqz AT, ->vmeta_tsetv // Integer key and in array part? + |. daddu TMP1, TMP1, TMP2 + | ld TMP0, 0(TMP1) + | lbu TMP3, TAB:RB->marked + | beq TMP0, TISNIL, >3 + |. ld CRET1, 0(RA) + |1: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(TMP1) + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<<MM_newindex + | bnez TMP2, <1 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsetv + |. nop + | + |5: + | gettp AT, TMP2 + | daddiu AT, AT, -LJ_TSTR + | bnez AT, ->vmeta_tsetv + |. nop + | b ->BC_TSETS_Z // String key? + |. cleartp STR:RC, TMP2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RC8a RC, INS + | ld TAB:RB, 0(CARG2) + | decode_RC8b RC + | dsubu CARG3, KBASE, RC + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->hash + | ld NODE:TMP2, TAB:RB->node + | sb r0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |.if FPU + | ldc1 f20, 0(RA) + |.else + | ld CRET1, 0(RA) + |.endif + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne TMP0, RC, >5 + |. lbu TMP3, TAB:RB->marked + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |. ld TAB:TMP0, TAB:RB->metatable + |2: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |.if FPU + |. sdc1 f20, NODE:TMP2->val + |.else + |. sd CRET1, NODE:TMP2->val + |.endif + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<<MM_newindex + | bnez TMP0, <2 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsets + |. nop + | + |5: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, >6 // No metatable: continue. + |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_newindex + | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | load_got lj_tab_newkey + | sd RC, 0(CARG3) + | sd BASE, L->base + | move CARG2, TAB:RB + | sd PC, SAVE_PC + | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k + |. move CARG1, L + | // Returns TValue *. + | ld BASE, L->base + |.if FPU + | b <3 // No 2nd write barrier needed. + |. sdc1 f20, 0(CRET1) + |.else + | ld CARG1, 0(RA) + | b <3 // No 2nd write barrier needed. + |. sd CARG1, 0(CRET1) + |.endif + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tsetb + |. daddu RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + |. ld CRET1, 0(RA) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(RC) + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<<MM_newindex + | bnez TMP1, <1 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + |. nop + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG1, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, LO(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. daddu RA, BASE, RA + |2: + | sltu AT, CARG3, TMP0 + | sll TMP2, CARG3, 3 + | beqz AT, ->vmeta_tsetr // In array part? + |. daddu CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld CARG1, 0(RA) + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | daddu RA, BASE, RA + |1: + | daddu TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiu TMP0, MULTRES, -8 + | lw TMP3, LO(TMP3) // Integer constant is in lo-word. + | beqz TMP0, >4 // Nothing to copy? + |. srl CARG3, TMP0, 3 + | cleartp CARG2 + | addu CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | sll TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | sltu AT, TMP2, CARG3 + | bnez AT, >5 + |. daddu TMP2, RA, TMP0 + | daddu TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sltu AT, RA, TMP2 + | sd CRET1, 0(TMP1) + | bnez AT, <3 + |. daddiu TMP1, TMP1, 8 + | bnez TMP0, >7 + |. nop + |4: + | ins_next + | + |5: // Need to resize array part. + | load_got lj_tab_reasize + | sd BASE, L->base + | sd PC, SAVE_PC + | move BASE, RD + | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + |. move CARG1, L + | // Must not reallocate the stack. + | move RD, BASE + | b <1 + |. ld BASE, L->base // Reload BASE for lack of a saved register. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | b ->BC_CALL_Z + |. addu NARGS8:RC, NARGS8:RC, MULTRES + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | move TMP2, BASE + | daddu BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | daddiu BASE, BASE, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. + | // Fall through. Assumes BC_CALLT follows. + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + | daddu RA, BASE, RA + | ld RB, 0(RA) + | move NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | daddiu RA, RA, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | bnez TMP0, >7 + |. xori TMP2, TMP1, FRAME_VARG + |1: + | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? + | move TMP2, BASE + | move RB, CARG3 + | beqz NARGS8:RC, >3 + |. move TMP3, NARGS8:RC + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | addiu TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | bnez TMP3, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | or TMP0, TMP0, AT + | beqz TMP0, >5 + |. nop + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8a RA, INS + | decode_RA8b RA + | dsubu TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | b <4 + |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | + |7: // Tailcall from a vararg function. + | andi AT, TMP2, FRAME_TYPEP + | bnez AT, <1 // Vararg frame below? + |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. + | move BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | b <1 + |. andi TMP0, TMP1, FRAME_TYPE + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | move TMP2, BASE // Save old BASE fir vmeta_call. + | daddu BASE, BASE, RA + | ld RB, -24(BASE) + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | daddiu BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |.if JIT + | // NYI: add hotloop, record BC_ITERN. + |.endif + | daddu RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8+LO(RA) // Get index from control var. + | cleartp TAB:RB + | daddiu PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | dsll CARG3, TISNUM, 47 + |1: // Traverse array part. + | sltu AT, RC, TMP0 + | beqz AT, >5 // Index points after array part? + |. sll TMP3, RC, 3 + | daddu TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) + | or TMP2, RC, CARG3 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + |. addiu RC, RC, 1 + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | or TMP0, RC, CARG3 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | decode_RD4b RD + | daddu RD, RD, TMP3 + | sw TMP0, -8+LO(RA) // Update control var. + | daddu PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subu RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. + | bnez AT, <3 + |. sll TMP3, RC, 5 + | sll RB, RC, 3 + | subu TMP3, TMP3, RB + | daddu NODE:TMP3, TMP3, TMP2 + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + |. addiu RC, RC, 1 + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sd CARG1, 8(RA) + | addu RC, RC, TMP0 + | decode_RD4b RD + | addu RD, RD, TMP3 + | sd CARG2, 0(RA) + | daddu PC, PC, RD + | b <3 + |. sw RC, -8+LO(RA) // Update control var. + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | daddu RA, BASE, RA + | srl TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | daddu TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | daddiu CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | daddiu CARG3, CARG3, -LJ_TNIL + | or AT, CARG2, CARG3 + | daddiu TMP1, TMP1, -FF_next_N + | or AT, AT, TMP1 + | bnez AT, >5 + |. lui TMP1, 0xfffe + | daddu PC, TMP0, TMP2 + | ori TMP1, TMP1, 0x7fff + | dsll TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | daddu PC, TMP0, TMP2 + | b <1 + |. sb TMP1, OFS_OP(PC) + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8a RB, INS + | daddu RC, BASE, RC + | decode_RB8b RB + | daddu RA, BASE, RA + | daddiu RC, RC, FRAME_VARG + | daddu TMP2, RA, RB + | daddiu TMP3, BASE, -16 // TMP3 = vtop + | dsubu RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | beqz RB, >5 // Copy all varargs? + |. dsubu TMP1, TMP3, RC + | daddiu TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu AT, RC, TMP3 + | daddiu RC, RC, 8 + | movz CARG1, TISNIL, AT + | sd CARG1, 0(RA) + | sltu AT, RA, TMP2 + | bnez AT, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | blez TMP1, <3 // No vararg slots? + |. li MULTRES, 8 // MULTRES = (0+1)*8 + | daddu TMP2, RA, TMP1 + | sltu AT, TMP0, TMP2 + | bnez AT, >7 + |. daddiu MULTRES, TMP1, 8 + |6: + | ld CRET1, 0(RC) + | daddiu RC, RC, 8 + | sd CRET1, 0(RA) + | sltu AT, RC, TMP3 + | bnez AT, <6 // More vararg slots? + |. daddiu RA, RA, 8 + | b <3 + |. nop + | + |7: // Grow stack for varargs. + | load_got lj_state_growstack + | sd RA, L->top + | dsubu RA, RA, BASE + | sd BASE, L->base + | dsubu BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC + | srl CARG2, TMP1, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | move RC, BASE + | ld BASE, L->base + | daddu RA, BASE, RA + | daddu RC, BASE, RC + | b <6 + |. daddiu TMP3, BASE, -16 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. + | // Fall through. Assumes BC_RET follows. + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + | daddiu RC, RD, -8 + | decode_RA8a TMP0, INS + | decode_RB8a RB, INS + | decode_RA8b TMP0 + | decode_RB8b RB + | daddu TMP3, TMP2, RB + | beqz RC, >3 + |. dsubu BASE, TMP2, TMP0 + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | daddiu RC, RC, -8 + | sd CRET1, 0(TMP2) + | bnez RC, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | daddiu TMP3, TMP3, -8 + |5: + | sltu AT, TMP2, TMP3 + | bnez AT, >6 + |. ld LFUNC:TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | b <5 + |. daddiu TMP2, TMP2, 8 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bnez TMP2, ->vm_return + |. nop + | // Return from vararg function: relocate BASE down. + | dsubu BASE, BASE, TMP1 + | b <1 + |. ld PC, FRAME_PC(BASE) + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8a RB, INS + | decode_RA8a RA, INS + | decode_RB8b RB + | decode_RA8b RA + | dsubu BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | sltu AT, RD, RB + | bnez AT, >6 + |. ld TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | daddiu TMP2, TMP2, 8 + | daddiu RD, RD, 8 + | b <5 + if (op == BC_RET1) { + |. sd TISNIL, 0(TMP2) + } else { + |. sd TISNIL, -8(TMP2) + } + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | daddu RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type + | gettp CARG3, CARG1 + if (op != BC_JFORL) { + | srl RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | daddu TMP2, RD, TMP2 + } + if (!vk) { + | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type + | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type + | gettp CARG4, CARG2 + | bne CARG3, TISNUM, >5 + |. gettp CRET2, CRET1 + | bne CARG4, TISNUM, ->vmeta_for + |. sextw CARG3, CARG1 + | bne CRET2, TISNUM, ->vmeta_for + |. sextw CARG2, CARG2 + | dext AT, CRET1, 31, 0 + | slt CRET1, CARG2, CARG3 + | slt TMP1, CARG3, CARG2 + | movn CRET1, TMP1, AT + } else { + | bne CARG3, TISNUM, >5 + |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type + | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type + | sextw TMP3, CARG1 + | sextw CARG2, CARG2 + | sextw CRET1, CRET1 + | addu CARG1, TMP3, CARG2 + | xor TMP0, CARG1, TMP3 + | xor TMP1, CARG1, CARG2 + | and TMP0, TMP0, TMP1 + | slt TMP1, CARG1, CRET1 + | slt CRET1, CRET1, CARG1 + | slt AT, CARG2, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + | movn CRET1, TMP1, AT + | or CRET1, CRET1, TMP0 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + } + |1: + if (op == BC_FORI) { + | movz TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | daddu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } + if (vk) { + | sd CARG1, FORL_IDX*8(RA) + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | beqz CRET1, =>BC_JLOOP + |. decode_RD8b RD + } else if (op == BC_JFORL) { + | beqz CRET1, =>BC_JLOOP + } + | ins_next2 + | + |5: // FP loop. + |.if FPU + if (!vk) { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | ld TMP3, FORL_STEP*8(RA) + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, TMP3, r0 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | ld TMP3, FORL_STEP*8(RA) + | add.d f0, f0, f4 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, TMP3, r0 + | li CRET1, 1 + | li AT, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | movn CRET1, AT, TMP3 + if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 + |. sdc1 f0, FORL_EXT*8(RA) + } + |.else + if (!vk) { + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. nop + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. nop + } else { + | load_got __adddf3 + | call_extern + |. sw TMP2, TMPD + | ld CARG2, FORL_STOP*8(RA) + | move CARG1, CRET1 + if ( op == BC_JFORL ) { + | lhu RD, -4+OFS_RD(PC) + | decode_RD8b RD + } + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. lw TMP2, TMPD + } + |.endif + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | daddu RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + |. nop + if (op == BC_JITERL) { + | b =>BC_JLOOP + |. sd TMP1, -8(RA) + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base*8 (ignored), RD = traceno*8 + | ld TMP1, DISPATCH_J(trace)(DISPATCH) + | li AT, 0 + | daddu TMP1, TMP1, RD + | // Traces on MIPS don't store the trace number, so use 0. + | sd AT, DISPATCH_GL(vmstate)(DISPATCH) + | ld TRACE:TMP2, 0(TMP1) + | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) + | ld TMP2, TRACE:TMP2->mcode + | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) + | jr TMP2 + |. daddiu JGL, DISPATCH, GG_DISP2G+32768 + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | sltu AT, TMP2, RA + | bnez AT, ->vm_growstack_l + |. sll TMP1, TMP1, 3 + if (op != BC_JFUNCF) { + | ins_next1 + } + |2: + | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. + | bnez AT, >3 + |. daddu AT, BASE, NARGS8:RC + if (op == BC_JFUNCF) { + | decode_RD8a RD, INS + | b =>BC_JLOOP + |. decode_RD8b RD + } else { + | ins_next2 + } + | + |3: // Clear missing parameters. + | sd TISNIL, 0(AT) + | b <2 + |. addiu NARGS8:RC, NARGS8:RC, 8 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC + | daddu TMP1, BASE, RC + | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | daddu TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | daddiu TMP3, RC, 16+FRAME_VARG + | sltu AT, TMP0, TMP2 + | ld KBASE, -4+PC2PROTO(k)(PC) + | beqz AT, ->vm_growstack_l + |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | move RA, BASE + | move RC, TMP1 + | ins_next1 + | beqz TMP2, >3 + |. daddiu BASE, TMP1, 16 + |1: + | ld TMP0, 0(RA) + | sltu AT, RA, RC // Less args than parameters? + | move CARG1, TMP0 + | movz TMP0, TISNIL, AT // Clear missing parameters. + | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). + | addiu TMP2, TMP2, -1 + | sd TMP0, 16(TMP1) + | daddiu TMP1, TMP1, 8 + | sd CARG1, 0(RA) + | bnez TMP2, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CFUNCADDR, CFUNC:RB->f + } else { + | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) + } + | daddu TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | daddu RC, BASE, NARGS8:RC + | sd BASE, L->base + | sltu AT, TMP2, TMP1 + | sd RC, L->top + | li_vmstate C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | bnez AT, ->vm_growstack_c // Need to grow stack. + |. move CARG1, L + | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) + |. st_vmstate + | // Returns nresults. + | ld BASE, L->base + | sll RD, CRET1, 3 + | ld TMP1, L->top + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | dsubu RA, TMP1, RD // RA = L->top - nresults*8 + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | b ->vm_returnc + |. st_vmstate + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 31\n" + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x9f\n\t.sleb128 2*5\n" + "\t.byte 0x9e\n\t.sleb128 2*6\n", + fcofs, CFRAME_SIZE); + for (i = 23; i >= 16; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); +#if !LJ_SOFTFP + for (i = 31; i >= 24; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); +#endif + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.4byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x9f\n\t.uleb128 2*1\n" + "\t.byte 0x90\n\t.uleb128 2*2\n" + "\t.byte 0xd\n\t.uleb128 0x10\n" + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + /* NYI */ +#endif + break; + default: + break; + } +} + diff --git a/luajit-2.1/src/vm_ppc.dasc b/luajit-2.1/src/vm_ppc.dasc index 0d6915f..b4260eb 100644 --- a/luajit-2.1/src/vm_ppc.dasc +++ b/luajit-2.1/src/vm_ppc.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch ppc |.section code_op, code_sub diff --git a/luajit-2.1/src/vm_x64.dasc b/luajit-2.1/src/vm_x64.dasc index bba89aa..a003fb4 100644 --- a/luajit-2.1/src/vm_x64.dasc +++ b/luajit-2.1/src/vm_x64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x64 CPUs in LJ_GC64 mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.arch x64 |.section code_op, code_sub @@ -250,11 +250,11 @@ |// Macros to clear or set tags. |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro |.macro settp, reg, tp -| mov64 ITYPE, ((int64_t)tp<<47) +| mov64 ITYPE, ((uint64_t)tp<<47) | or reg, ITYPE |.endmacro |.macro settp, dst, reg, tp -| mov64 dst, ((int64_t)tp<<47) +| mov64 dst, ((uint64_t)tp<<47) | or dst, reg |.endmacro |.macro setint, reg @@ -1105,11 +1105,11 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, L:RB->base | mov NARGS:RDd, TMP1d | mov LFUNC:RB, [RA-16] - | cleartp LFUNC:RB | add NARGS:RDd, 1 | // This is fragile. L->base must not move, KBASE must always be defined. | cmp KBASE, BASE // Continue with CALLT if flag set. | je ->BC_CALLT_Z + | cleartp LFUNC:RB | mov BASE, RA | ins_call // Otherwise call resolved metamethod. | @@ -1469,7 +1469,7 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE-16], CFUNC:RD | mov [BASE-8], TMPR |.if DUALNUM - | mov64 RD, ((int64_t)LJ_TISNUM<<47) + | mov64 RD, ((uint64_t)LJ_TISNUM<<47) | mov [BASE], RD |.else | mov qword [BASE], 0 @@ -1804,8 +1804,12 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res1 | |.ffunc_n math_frexp - | lea CARG1, TMP1 | mov RB, BASE + |.if X64WIN + | lea CARG2, TMP1 // Caveat: CARG2 == BASE + |.else + | lea CARG1, TMP1 + |.endif | call extern frexp | mov BASE, RB | mov RBd, TMP1d @@ -1822,9 +1826,12 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res | |.ffunc_n math_modf - | lea CARG1, [BASE-16] - | mov PC, [BASE-8] | mov RB, BASE + |.if X64WIN + | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE + |.else + | lea CARG1, [BASE-16] + |.endif | call extern modf | mov BASE, RB | mov PC, [BASE-8] @@ -2317,7 +2324,8 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // BASE = base, RC = result, RB = mbase - | mov ITYPEd, [RB-24] // Save previous trace number. + | mov TRACE:ITYPE, [RB-40] // Save previous trace. + | cleartp TRACE:ITYPE | mov TMPRd, MULTRES | movzx RAd, PC_RA | lea RA, [BASE+RA*8] // Call base. @@ -2339,11 +2347,10 @@ static void build_subroutines(BuildCtx *ctx) | cmp RC, RA | ja >9 // More results wanted? | - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+ITYPE*8] - | test TRACE:RD, TRACE:RD + | test TRACE:ITYPE, TRACE:ITYPE | jz ->cont_nop - | movzx RDd, word TRACE:RD->link + | movzx RBd, word TRACE:ITYPE->traceno + | movzx RDd, word TRACE:ITYPE->link | cmp RDd, RBd | je ->cont_nop // Blacklisted. | test RDd, RDd @@ -2394,12 +2401,11 @@ static void build_subroutines(BuildCtx *ctx) | movzx RCd, byte [rbp-8] // Reconstruct exit number. | mov RCH, byte [rbp-16] | mov [rbp-8], r15; mov [rbp-16], r14 - | // Caveat: DISPATCH is rbx. - | mov DISPATCH, [ebp] - | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. + | // DISPATCH is preserved on-trace in LJ_GC64 mode. + | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | set_vmstate EXIT - | mov [DISPATCH+DISPATCH_J(exitno)], RC - | mov [DISPATCH+DISPATCH_J(parent)], RA + | mov [DISPATCH+DISPATCH_J(exitno)], RCd + | mov [DISPATCH+DISPATCH_J(parent)], RAd |.if X64WIN | sub rsp, 16*8+4*8 // Room for SSE regs + save area. |.else @@ -2425,7 +2431,7 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG2, rsp |.endif | lea CARG1, [DISPATCH+GG_DISP2J] - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | call extern lj_trace_exit // (jit_State *J, ExitState *ex) | // MULTRES or negated error code returned in eax (RD). | mov RA, L:RB->cframe @@ -2472,7 +2478,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] | mov L:RB->base, BASE - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | set_vmstate INTERP | // Modified copy of ins_next which handles function header dispatch, too. | mov RCd, [PC] @@ -2496,7 +2502,7 @@ static void build_subroutines(BuildCtx *ctx) | // Otherwise set KBASE for Lua function below fast function. | movzx RCd, byte [RC-3] | neg RC - | mov LFUNC:KBASE, [BASE+RC*8-24] + | mov LFUNC:KBASE, [BASE+RC*8-32] | cleartp LFUNC:KBASE | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] @@ -2631,6 +2637,7 @@ static void build_subroutines(BuildCtx *ctx) | mov eax, CARG1d | .if X64WIN; push rsi; mov rsi, CARG2; .endif | push rbx + | xor ecx, ecx | cpuid | mov [rsi], eax | mov [rsi+4], ebx @@ -3508,7 +3515,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AD // RA = level, RD = target | branchPC RD // Do this first to free RD. | mov L:RB, SAVE_L - | cmp dword L:RB->openupval, 0 + | cmp aword L:RB->openupval, 0 | je >1 | mov L:RB->base, BASE | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE @@ -4042,7 +4049,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. | mov [RA], RB | mov [RA+8], RC - | mov LFUNC:RB, [RA-40] // Copy callable. fb[-1] = fb[-5] + | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] | mov [RA-16], LFUNC:RB | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. | checkfunc LFUNC:RB, ->vmeta_call diff --git a/luajit-2.1/src/vm_x86.dasc b/luajit-2.1/src/vm_x86.dasc index 96ac1da..211ae7b 100644 --- a/luajit-2.1/src/vm_x86.dasc +++ b/luajit-2.1/src/vm_x86.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x86 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h | |.if P64 |.arch x64 @@ -121,19 +121,68 @@ |//----------------------------------------------------------------------- |.if not X64 // x86 stack layout. | -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +|.if WIN +| +|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). |.macro saveregs_ | push edi; push esi; push ebx +| push extern lj_err_unwind_win +| fs; push dword [0] +| fs; mov [0], esp | sub esp, CFRAME_SPACE |.endmacro -|.macro saveregs -| push ebp; saveregs_ +|.macro restoreregs +| add esp, CFRAME_SPACE +| fs; pop dword [0] +| pop edi // Short for esp += 4. +| pop ebx; pop esi; pop edi; pop ebp +|.endmacro +| +|.else +| +|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +|.macro saveregs_ +| push edi; push esi; push ebx +| sub esp, CFRAME_SPACE |.endmacro |.macro restoreregs | add esp, CFRAME_SPACE | pop ebx; pop esi; pop edi; pop ebp |.endmacro | +|.endif +| +|.macro saveregs +| push ebp; saveregs_ +|.endmacro +| +|.if WIN +|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. +|.define SAVE_NRES, aword [esp+aword*18] +|.define SAVE_CFRAME, aword [esp+aword*17] +|.define SAVE_L, aword [esp+aword*16] +|//----- 16 byte aligned, ^^^ arguments from C caller +|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. +|.define SAVE_R4, aword [esp+aword*14] +|.define SAVE_R3, aword [esp+aword*13] +|.define SAVE_R2, aword [esp+aword*12] +|//----- 16 byte aligned +|.define SAVE_R1, aword [esp+aword*11] +|.define SEH_FUNC, aword [esp+aword*10] +|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. +|.define UNUSED2, aword [esp+aword*8] +|//----- 16 byte aligned +|.define UNUSED1, aword [esp+aword*7] +|.define SAVE_PC, aword [esp+aword*6] +|.define TMP2, aword [esp+aword*5] +|.define TMP1, aword [esp+aword*4] +|//----- 16 byte aligned +|.define ARG4, aword [esp+aword*3] +|.define ARG3, aword [esp+aword*2] +|.define ARG2, aword [esp+aword*1] +|.define ARG1, aword [esp] //<-- esp while in interpreter. +|//----- 16 byte aligned, ^^^ arguments for C callee +|.else |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |.define SAVE_NRES, aword [esp+aword*14] |.define SAVE_CFRAME, aword [esp+aword*13] @@ -154,6 +203,7 @@ |.define ARG2, aword [esp+aword*1] |.define ARG1, aword [esp] //<-- esp while in interpreter. |//----- 16 byte aligned, ^^^ arguments for C callee +|.endif | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |.define FPARG3, qword [esp+qword*1] @@ -554,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | mov eax, FCARG2 // Error return status for vm_pcall. | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_c_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -577,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | and FCARG1, CFRAME_RAWMASK | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -590,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx) | set_vmstate INTERP | jmp ->vm_returnc // Increments RD/MULTRES and returns. | + |.if WIN and not X64 + |->vm_rtlunwind@16: // Thin layer around RtlUnwind. + | // (void *cframe, void *excptrec, void *unwinder, int errcode) + | mov [esp], FCARG1 // Return value for RtlUnwind. + | push FCARG2 // Exception record for RtlUnwind. + | push 0 // Ignored by RtlUnwind. + | push dword [FCARG1+CFRAME_OFS_SEH] + | call extern RtlUnwind@16 // Violates ABI (clobbers too much). + | mov FCARG1, eax + | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). + | ret // Jump to unwinder. + |.endif + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -3026,6 +3097,7 @@ static void build_subroutines(BuildCtx *ctx) | mov eax, CARG1d | .if X64WIN; push rsi; mov rsi, CARG2; .endif | push rbx + | xor ecx, ecx | cpuid | mov [rsi], eax | mov [rsi+4], ebx @@ -3049,6 +3121,7 @@ static void build_subroutines(BuildCtx *ctx) | mov eax, [esp+4] // Argument 1 is function number. | push edi | push ebx + | xor ecx, ecx | cpuid | mov edi, [esp+16] // Argument 2 is result area. | mov [edi], eax |