diff options
author | Ronan Collobert <locronan@fb.com> | 2015-11-24 00:16:25 +0300 |
---|---|---|
committer | Ronan Collobert <locronan@fb.com> | 2015-11-24 00:16:25 +0300 |
commit | 09c1591e5cfdbf21fbe350de34fd94b971223c1e (patch) | |
tree | aa9029c03f9921abdb6ea56c3809a102b27c6e23 | |
parent | 21269a1bc078531371d82db31a8db968befb8fc8 (diff) |
Squashed 'luajit-2.0/' changes from 8715ae6..367cba2
367cba2 DynASM/x86: Add rdpmc instruction.
7991a66 Extend Valgrind suppressions for non-inlined C code.
1393b2f Update build instructions.
f416cf6 OSX: Switch to Clang as the default compiler.
3639ff4 iOS: Disable os.execute() when building for iOS >= 8.0.
7579b16 DynASM/x86: Restrict shld/shrd to operands with same width.
9dc785e DynASM/x86: Fix some SSE instruction templates.
e5f4cb1 FFI: Fix ipairs() recording.
4808e62 Drop marks from replayed instructions when sinking.
2e85af8 Flush symbol tables in jit.dump on trace flush.
ef087aa Fix unsinking check.
076d625 Fix Cygwin support. Note: this is not a supported target.
git-subtree-dir: luajit-2.0
git-subtree-split: 367cba29ea03aa758eb041c3a0c5f1c7616f9a0a
-rw-r--r-- | doc/install.html | 27 | ||||
-rw-r--r-- | dynasm/dasm_x86.lua | 11 | ||||
-rw-r--r-- | src/Makefile | 46 | ||||
-rw-r--r-- | src/jit/dump.lua | 1 | ||||
-rw-r--r-- | src/lib_os.c | 2 | ||||
-rw-r--r-- | src/lj.supp | 15 | ||||
-rw-r--r-- | src/lj_alloc.c | 2 | ||||
-rw-r--r-- | src/lj_arch.h | 9 | ||||
-rw-r--r-- | src/lj_err.c | 6 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 9 | ||||
-rw-r--r-- | src/lj_snap.c | 28 |
11 files changed, 93 insertions, 63 deletions
diff --git a/doc/install.html b/doc/install.html index 7a878b1..5dc1289 100644 --- a/doc/install.html +++ b/doc/install.html @@ -112,23 +112,23 @@ operating systems, CPUs and compilers: </tr> <tr class="odd separate"> <td class="compatcpu">x86 (32 bit)</td> -<td class="compatos">GCC 4.x<br>GCC 3.4</td> -<td class="compatos">GCC 4.x<br>GCC 3.4</td> -<td class="compatos">GCC 4.x<br>GCC 3.4</td> +<td class="compatos">GCC 4.x+<br>GCC 3.4</td> +<td class="compatos">GCC 4.x+<br>GCC 3.4</td> +<td class="compatos">XCode 5.0+<br>Clang</td> <td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td> </tr> <tr class="even"> <td class="compatcpu">x64 (64 bit)</td> -<td class="compatos">GCC 4.x</td> +<td class="compatos">GCC 4.x+</td> <td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> -<td class="compatos">GCC 4.x</td> +<td class="compatos">XCode 5.0+<br>Clang</td> <td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td> </tr> <tr class="odd"> <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> <td class="compatos">GCC 4.2+</td> <td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td> -<td class="compatos">GCC 4.2+</td> +<td class="compatos">XCode 5.0+<br>Clang</td> <td class="compatos compatno"> </td> </tr> <tr class="even"> @@ -439,8 +439,7 @@ NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" </pre> <p> -You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">»</span> iOS SDK</a>. -The environment variables need to match the iOS SDK version: +You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">»</span> iOS SDK</a>: </p> <p style="font-size: 8pt;"> Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps @@ -450,13 +449,11 @@ much slower than the JIT compiler. Please complain to Apple, not me. Or use Android. :-p </p> <pre class="code"> -IXCODE=`xcode-select -print-path` -ISDK=$IXCODE/Platforms/iPhoneOS.platform/Developer -ISDKVER=iPhoneOS6.0.sdk -ISDKP=$ISDK/usr/bin/ -ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER" -make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \ - TARGET_SYS=iOS +ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) +ICC=$(xcrun --sdk iphoneos --find clang) +ISDKF="-arch armv7 -isysroot $ISDKP" +make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \ + TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS </pre> <h3 id="consoles">Cross-compiling for consoles</h3> diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 7ca061d..3a535f2 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -1081,10 +1081,11 @@ local map_op = { btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", - shld_3 = "mriqdw:0FA4RmU|mrCqdw:0FA5Rm", - shrd_3 = "mriqdw:0FACRmU|mrCqdw:0FADRm", + shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", + shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", rdtsc_0 = "0F31", -- P1+ + rdpmc_0 = "0F33", -- P6+ cpuid_0 = "0FA2", -- P1+ -- floating point ops @@ -1190,7 +1191,7 @@ local map_op = { cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", cvtss2sd_2 = "rro:F30F5ArM|rx/od:", - cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:", + cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", cvttpd2dq_2 = "rmo:660FE6rM", cvttps2dq_2 = "rmo:F30F5BrM", cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", @@ -1247,7 +1248,7 @@ local map_op = { pcmpgtb_2 = "rmo:660F64rM", pcmpgtd_2 = "rmo:660F66rM", pcmpgtw_2 = "rmo:660F65rM", - pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only. + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", pmaddwd_2 = "rmo:660FF5rM", pmaxsw_2 = "rmo:660FEErM", @@ -1352,7 +1353,7 @@ local map_op = { dpps_3 = "rmio:660F3A40rMU", extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", insertps_3 = "rrio:660F3A41rMU|rxi/od:", - movntdqa_2 = "rmo:660F382ArM", + movntdqa_2 = "rxo:660F382ArM", mpsadbw_3 = "rmio:660F3A42rMU", packusdw_2 = "rmo:660F382BrM", pblendvb_3 = "rmRo:660F3810rM", diff --git a/src/Makefile b/src/Makefile index 5021e47..9c95c4c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -24,11 +24,13 @@ NODOTABIVER= 51 # removing the '#' in front of them. Make sure you force a full recompile # with "make clean", followed by "make" if you change any options. # +DEFAULT_CC = gcc +# # LuaJIT builds as a native 32 or 64 bit binary by default. -CC= gcc +CC= $(DEFAULT_CC) # # Use this if you want to force a 32 bit build on a 64 bit multilib OS. -#CC= gcc -m32 +#CC= $(DEFAULT_CC) -m32 # # Since the assembler part does NOT maintain a frame pointer, it's pointless # to slow down the C part by not omitting it. Debugging, tracebacks and @@ -151,6 +153,29 @@ XCFLAGS= ############################################################################## ############################################################################## +# Host system detection. +############################################################################## + +ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) + HOST_SYS= Windows + HOST_RM= del +else + HOST_SYS:= $(shell uname -s) + ifneq (,$(findstring MINGW,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif + ifneq (,$(findstring CYGWIN,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= cygwin + endif + # Use Clang for OSX host. + ifeq (Darwin,$(HOST_SYS)) + DEFAULT_CC= clang + endif +endif + +############################################################################## # Flags and options for host and target. ############################################################################## @@ -263,24 +288,9 @@ ifneq (,$(LMULTILIB)) endif ############################################################################## -# System detection. +# Target system detection. ############################################################################## -ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) - HOST_SYS= Windows - HOST_RM= del -else - HOST_SYS:= $(shell uname -s) - ifneq (,$(findstring MINGW,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= mingw - endif - ifneq (,$(findstring CYGWIN,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= cygwin - endif -endif - TARGET_SYS?= $(HOST_SYS) ifeq (Windows,$(TARGET_SYS)) TARGET_STRIP+= --strip-unneeded diff --git a/src/jit/dump.lua b/src/jit/dump.lua index d15c528..e6ea18a 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -564,6 +564,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) end if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end else + if what == "flush" then symtab, nexitsym = {}, 0 end out:write("---- TRACE ", what, "\n\n") end out:flush() diff --git a/src/lib_os.c b/src/lib_os.c index bb5a141..762bb59 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -36,7 +36,7 @@ LJLIB_CF(os_execute) { -#if LJ_TARGET_CONSOLE +#if LJ_NO_SYSTEM #if LJ_52 errno = ENOSYS; return luaL_fileresult(L, 0, NULL); diff --git a/src/lj.supp b/src/lj.supp index 411f261..acb9e78 100644 --- a/src/lj.supp +++ b/src/lj.supp @@ -24,3 +24,18 @@ Memcheck:Cond fun:lj_str_new } +{ + Optimized string compare + Memcheck:Addr4 + fun:lj_str_fastcmp +} +{ + Optimized string compare + Memcheck:Addr1 + fun:lj_str_fastcmp +} +{ + Optimized string compare + Memcheck:Cond + fun:lj_str_fastcmp +} diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 7c7ec67..9218c44 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -194,7 +194,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) return ptr; } -#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) +#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__) /* OSX and FreeBSD mmap() use a naive first-fit linear search. ** That's perfect for us. Except that -pagezero_size must be set for OSX, diff --git a/src/lj_arch.h b/src/lj_arch.h index d89d116..f1a1160 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -151,7 +151,11 @@ #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#define LJ_ABI_WIN LJ_TARGET_WINDOWS +#if LJ_TARGET_WINDOWS || __CYGWIN__ +#define LJ_ABI_WIN 1 +#else +#define LJ_ABI_WIN 0 +#endif #define LJ_TARGET_X64 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 @@ -422,6 +426,9 @@ #if defined(__symbian__) #define LUAJIT_NO_EXP2 #endif +#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) +#define LJ_NO_SYSTEM 1 +#endif #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 #define LJ_NO_UNWIND 1 diff --git a/src/lj_err.c b/src/lj_err.c index 11b07b5..6d8519b 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -186,7 +186,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) /* -- External frame unwinding -------------------------------------------- */ -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_TARGET_WINDOWS +#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN /* ** We have to use our own definitions instead of the mandatory (!) unwind.h, @@ -352,7 +352,7 @@ LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) #endif -#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS +#elif LJ_TARGET_X64 && LJ_ABI_WIN /* ** Someone in Redmond owes me several days of my life. A lot of this is @@ -417,7 +417,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, if (cf2) { /* We catch it, so start unwinding the upper frames. */ if (rec->ExceptionCode == LJ_MSVC_EXCODE || rec->ExceptionCode == LJ_GCC_EXCODE) { +#if LJ_TARGET_WINDOWS __DestructExceptionObject(rec, 1); +#endif setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { /* Don't catch access violations etc. */ diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 69f71ab..3083225 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -366,11 +366,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) { - if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) { - TRef tab = J->base[0]; - if (tref_istab(tab)) { + TRef tr = J->base[0]; + if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && + recff_metacall(J, rd, MM_ipairs))) { + if (tref_istab(tr)) { J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); - J->base[1] = tab; + J->base[1] = tr; J->base[2] = lj_ir_kint(J, 0); rd->nres = 3; } /* else: Interpreter will throw. */ diff --git a/src/lj_snap.c b/src/lj_snap.c index 5c870ba..50412bc 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -26,9 +26,6 @@ #include "lj_cdata.h" #endif -/* Some local macros to save typing. Undef'd at the end. */ -#define IR(ref) (&J->cur.ir[(ref)]) - /* Pass IR on to next optimization in chain (FOLD). */ #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) @@ -73,7 +70,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) IRRef ref = tref_ref(tr); if (ref) { SnapEntry sn = SNAP_TR(s, tr); - IRIns *ir = IR(ref); + IRIns *ir = &J->cur.ir[ref]; if (!(sn & (SNAP_CONT|SNAP_FRAME)) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ @@ -404,24 +401,24 @@ static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, } /* Check whether a sunk store corresponds to an allocation. Slow path. */ -static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs) +static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs) { if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || irs->o == IR_FSTORE || irs->o == IR_XSTORE) { - IRIns *irk = IR(irs->op1); + IRIns *irk = &T->ir[irs->op1]; if (irk->o == IR_AREF || irk->o == IR_HREFK) - irk = IR(irk->op1); - return (IR(irk->op1) == ira); + irk = &T->ir[irk->op1]; + return (&T->ir[irk->op1] == ira); } return 0; } /* Check whether a sunk store corresponds to an allocation. Fast path. */ -static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs) +static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs) { if (irs->s != 255) return (ira + irs->s == irs); /* Fast check. */ - return snap_sunk_store2(J, ira, irs); + return snap_sunk_store2(T, ira, irs); } /* Replay snapshot state to setup side trace. */ @@ -484,7 +481,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) } else { IRIns *irs; for (irs = ir+1; irs < irlast; irs++) - if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && @@ -518,13 +515,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T) op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, snap_pref(J, T, map, nent, seen, (ir+1)->op2)); } - J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); + J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); } else { IRIns *irs; TRef tr = emitir(ir->ot, op1, op2); J->slot[snap_slot(sn)] = tr; for (irs = ir+1; irs < irlast; irs++) - if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irr = &T->ir[irs->op1]; TRef val, key = irr->op2, tmp = tr; if (irr->o != IR_FREF) { @@ -726,7 +723,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) - if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *iro = &T->ir[T->ir[irs->op1].op2]; uint8_t *p = (uint8_t *)cd; CTSize szs; @@ -759,7 +756,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, settabV(J->L, o, t); irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) - if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || @@ -859,7 +856,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) return pc; } -#undef IR #undef emitir_raw #undef emitir |