Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/luajit-rocks.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'luajit-2.1/src/vm_arm64.dasc')
-rw-r--r--luajit-2.1/src/vm_arm64.dasc287
1 files changed, 255 insertions, 32 deletions
diff --git a/luajit-2.1/src/vm_arm64.dasc b/luajit-2.1/src/vm_arm64.dasc
index f1251f2..3eaf376 100644
--- a/luajit-2.1/src/vm_arm64.dasc
+++ b/luajit-2.1/src/vm_arm64.dasc
@@ -1,6 +1,6 @@
|// Low-level VM code for ARM64 CPUs.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|.arch arm64
|.section code_op, code_sub
@@ -151,6 +151,21 @@
|.define FRAME_FUNC, #-16
|.define FRAME_PC, #-8
|
+|// Endian-specific defines.
+|.if ENDIAN_LE
+|.define LO, 0
+|.define OFS_RD, 2
+|.define OFS_RB, 3
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|.else
+|.define LO, 4
+|.define OFS_RD, 0
+|.define OFS_RB, 0
+|.define OFS_RA, 2
+|.define OFS_OP, 3
+|.endif
+|
|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
@@ -236,12 +251,17 @@
|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
|
-#define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field))
+#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|.macro hotcheck, delta
-| NYI
+| lsr CARG1, PC, #1
+| and CARG1, CARG1, #126
+| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
+| ldrh CARG2w, [GL, CARG1]
+| subs CARG2, CARG2, #delta
+| strh CARG2w, [GL, CARG1]
|.endmacro
|
|.macro hotloop
@@ -712,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx)
| cmp CRET1, #1
| bhi ->vmeta_binop
|4:
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
| sub RB, RB, #0x20000
@@ -869,7 +889,7 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern lj_meta_for // (lua_State *L, TValue *base)
| ldr INSw, [PC, #-4]
|.if JIT
- | uxtb TMP0, INS
+ | uxtb TMP0w, INSw
|.endif
| decode_RA RA, INS
| decode_RD RC, INS
@@ -1495,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
| checkint CARG1, ->fff_fallback
| mov CARG3, #1
- | mov CARG2, BASE // Points to stack. Little-endian.
+ | // Point to the char inside the integer in the stack slot.
+ |.if ENDIAN_LE
+ | mov CARG2, BASE
+ |.else
+ | add CARG2, BASE, #7
+ |.endif
|->fff_newstr:
| // CARG2 = str, CARG3 = len.
| str BASE, L->base
@@ -1698,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx)
| ands TMP0, PC, #FRAME_TYPE
| and TMP1, PC, #~FRAME_TYPEP
| bne >3
- | ldrb RAw, [PC, #-3]
+ | ldrb RAw, [PC, #-4+OFS_RA]
| lsl RA, RA, #3
| add TMP1, RA, #16
|3:
@@ -1732,7 +1757,20 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|->vm_record: // Dispatch target for recording phase.
- | NYI
+ |.if JIT
+ | ldrb CARG1w, GL->hookmask
+ | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
+ | bne >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | ldr CARG2w, GL->hookcount
+ | tst CARG1, #HOOK_ACTIVE
+ | bne >1
+ | sub CARG2w, CARG2w, #1
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+ | beq >1
+ | str CARG2w, GL->hookcount
+ | b >1
+ |.endif
|
|->vm_rethook: // Dispatch target for return hooks.
| ldrb TMP2w, GL->hookmask
@@ -1774,7 +1812,21 @@ static void build_subroutines(BuildCtx *ctx)
| b <4
|
|->vm_hotloop: // Hot loop counter underflow.
- | NYI
+ |.if JIT
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
+ | add CARG1, GL, #GG_G2DISP+GG_DISP2J
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | str PC, SAVE_PC
+ | ldr CARG3, LFUNC:CARG3->pc
+ | mov CARG2, PC
+ | str L, [GL, #GL_J(L)]
+ | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
+ | str BASE, L->base
+ | add CARG3, BASE, CARG3, lsl #3
+ | str CARG3, L->top
+ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | b <3
+ |.endif
|
|->vm_callhook: // Dispatch target for call hooks.
| mov CARG2, PC
@@ -1804,7 +1856,54 @@ static void build_subroutines(BuildCtx *ctx)
| br CRET1
|
|->cont_stitch: // Trace stitching.
- | NYI
+ |.if JIT
+ | // RA = resultptr, CARG4 = meta base
+ | ldr RBw, SAVE_MULTRES
+ | ldr INSw, [PC, #-4]
+ | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
+ | subs RB, RB, #8
+ | decode_RA RC, INS // Call base.
+ | and CARG3, CARG3, #LJ_GCVMASK
+ | beq >2
+ |1: // Move results down.
+ | ldr CARG1, [RA]
+ | add RA, RA, #8
+ | subs RB, RB, #8
+ | str CARG1, [BASE, RC, lsl #3]
+ | add RC, RC, #1
+ | bne <1
+ |2:
+ | decode_RA RA, INS
+ | decode_RB RB, INS
+ | add RA, RA, RB
+ |3:
+ | cmp RA, RC
+ | bhi >9 // More results wanted?
+ |
+ | ldrh RAw, TRACE:CARG3->traceno
+ | ldrh RCw, TRACE:CARG3->link
+ | cmp RCw, RAw
+ | beq ->cont_nop // Blacklisted.
+ | cmp RCw, #0
+ | bne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | mov CARG1, #GL_J(exitno)
+ | str RAw, [GL, CARG1]
+ | mov CARG1, #GL_J(L)
+ | str L, [GL, CARG1]
+ | str BASE, L->base
+ | add CARG1, GL, #GG_G2J
+ | mov CARG2, PC
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | ldr BASE, L->base
+ | b ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | str TISNIL, [BASE, RC, lsl #3]
+ | add RC, RC, #1
+ | b <3
+ |.endif
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
@@ -1822,10 +1921,122 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
+ |.macro savex_, a, b
+ | stp d..a, d..b, [sp, #a*8]
+ | stp x..a, x..b, [sp, #32*8+a*8]
+ |.endmacro
+ |
|->vm_exit_handler:
- | NYI
+ |.if JIT
+ | sub sp, sp, #(64*8)
+ | savex_, 0, 1
+ | savex_, 2, 3
+ | savex_, 4, 5
+ | savex_, 6, 7
+ | savex_, 8, 9
+ | savex_, 10, 11
+ | savex_, 12, 13
+ | savex_, 14, 15
+ | savex_, 16, 17
+ | savex_, 18, 19
+ | savex_, 20, 21
+ | savex_, 22, 23
+ | savex_, 24, 25
+ | savex_, 26, 27
+ | savex_, 28, 29
+ | stp d30, d31, [sp, #30*8]
+ | ldr CARG1, [sp, #64*8] // Load original value of lr.
+ | add CARG3, sp, #64*8 // Recompute original value of sp.
+ | mv_vmstate CARG4w, EXIT
+ | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
+ | sub CARG1, CARG1, lr
+ | ldr L, GL->cur_L
+ | lsr CARG1, CARG1, #2
+ | ldr BASE, GL->jit_base
+ | sub CARG1, CARG1, #2
+ | ldr CARG2w, [lr] // Load trace number.
+ | st_vmstate CARG4w
+ |.if ENDIAN_BE
+ | rev32 CARG2, CARG2
+ |.endif
+ | str BASE, L->base
+ | ubfx CARG2w, CARG2w, #5, #16
+ | str CARG1w, [GL, #GL_J(exitno)]
+ | str CARG2w, [GL, #GL_J(parent)]
+ | str L, [GL, #GL_J(L)]
+ | str xzr, GL->jit_base
+ | add CARG1, GL, #GG_G2J
+ | mov CARG2, sp
+ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // Returns MULTRES (unscaled) or negated error code.
+ | ldr CARG2, L->cframe
+ | ldr BASE, L->base
+ | and sp, CARG2, #CFRAME_RAWMASK
+ | ldr PC, SAVE_PC // Get SAVE_PC.
+ | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
+ | b >1
+ |.endif
+ |
|->vm_exit_interp:
- | NYI
+ | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
+ |.if JIT
+ | ldr L, SAVE_L
+ |1:
+ | cmp CARG1w, #0
+ | blt >9 // Check for error from exit.
+ | lsl RC, CARG1, #3
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | str RCw, SAVE_MULTRES
+ | str BASE, L->base
+ | ldr CARG2, LFUNC:CARG2->pc
+ | str xzr, GL->jit_base
+ | mv_vmstate CARG4w, INTERP
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | ldrb RBw, [PC, # OFS_OP]
+ | ldr INSw, [PC], #4
+ | st_vmstate CARG4w
+ | cmp RBw, #BC_FUNCC+2 // Fast function?
+ | add TMP1, GL, INS, uxtb #3
+ | bhs >4
+ |2:
+ | cmp RBw, #BC_FUNCF // Function header?
+ | add TMP0, GL, RB, uxtb #3
+ | ldr RB, [TMP0, #GG_G2DISP]
+ | decode_RA RA, INS
+ | lsr TMP0, INS, #16
+ | csel RC, TMP0, RC, lo
+ | blo >5
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | sub RC, RC, #8
+ | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ |5:
+ | br RB
+ |
+ |4: // Check frame below fast function.
+ | ldr CARG1, [BASE, FRAME_PC]
+ | ands CARG2, CARG1, #FRAME_TYPE
+ | bne <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | ldr CARG3w, [CARG1, #-4]
+ | decode_RA CARG1, CARG3
+ | sub CARG2, BASE, CARG1, lsl #3
+ | ldr LFUNC:CARG3, [CARG2, #-32]
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ldr CARG3, LFUNC:CARG3->pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | b <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | neg CARG2, CARG1
+ | mov CARG1, L
+ | bl extern lj_err_throw // (lua_State *L, int errcode)
+ |.endif
|
|//-----------------------------------------------------------------------
|//-- Math helper functions ----------------------------------------------
@@ -1965,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [BASE, RC, lsl #3]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@@ -2022,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src1, RC = src2, JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| add RC, BASE, RC, lsl #3
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@@ -2083,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = str_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| mvn RC, RC
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| ldr CARG2, [KBASE, RC, lsl #3]
| add PC, PC, #4
| movn TMP0, #~LJ_TSTR
@@ -2111,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = src, RC = num_const (~), JMP with RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| add RC, KBASE, RC, lsl #3
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| ldr CARG3, [RC]
| add PC, PC, #4
| add RB, PC, RB, lsl #2
@@ -2171,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQP;
| // RA = src, RC = primitive_type (~), JMP with RC = target
| ldr TMP0, [BASE, RA, lsl #3]
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| add PC, PC, #4
| add RC, RC, #1
| add RB, PC, RB, lsl #2
@@ -2196,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
| // RA = dst or unused, RC = src, JMP with RC = target
- | ldrh RBw, [PC, #2]
+ | ldrh RBw, [PC, # OFS_RD]
| ldr TMP0, [BASE, RC, lsl #3]
| add PC, PC, #4
| mov_false TMP1
@@ -2443,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| str PC, SAVE_PC
| bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
| // Returns NULL (finished) or TValue * (metamethod).
- | ldrb RBw, [PC, #-1]
+ | ldrb RBw, [PC, #-4+OFS_RB]
| ldr BASE, L->base
| cbnz CRET1, ->vmeta_binop
| ldr TMP0, [BASE, RB, lsl #3]
@@ -3074,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_callt
|
|5: // Tailcall to a fast function with a Lua frame below.
- | ldrb RAw, [PC, #-3]
+ | ldrb RAw, [PC, #-4+OFS_RA]
| sub CARG1, BASE, RA, lsl #3
| ldr LFUNC:CARG1, [CARG1, #-32]
| and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -3115,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| add RA, BASE, RA, lsl #3
| ldr TAB:RB, [RA, #-16]
- | ldrh TMP3w, [PC, #2]
- | ldr CARG1w, [RA, #-8] // Get index from control var.
+ | ldrh TMP3w, [PC, # OFS_RD]
+ | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
| add PC, PC, #4
| add TMP3, PC, TMP3, lsl #2
| and TAB:RB, RB, #LJ_GCVMASK
@@ -3135,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stp CARG1, TMP0, [RA]
| add CARG1, CARG1, #1
|3:
- | str CARG1w, [RA, #-8] // Update control var.
+ | str CARG1w, [RA, #-8+LO] // Update control var.
| mov PC, TMP3
|4:
| ins_next
@@ -3181,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Despecialize bytecode if any of the checks fail.
| mov TMP0, #BC_JMP
| mov TMP1, #BC_ITERC
- | strb TMP0w, [PC, #-4]
- | strb TMP1w, [RC]
+ | strb TMP0w, [PC, #-4+OFS_OP]
+ | strb TMP1w, [RC, # OFS_OP]
| b <1
break;
@@ -3387,7 +3598,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FORI) {
| csel PC, RC, PC, gt
} else if (op == BC_JFORI) {
- | ldrh RCw, [RC, #-2]
+ | mov PC, RC
+ | ldrh RCw, [RC, #-4+OFS_RD]
} else if (op == BC_IFORL) {
| csel PC, RC, PC, le
}
@@ -3428,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FORI) {
| csel PC, RC, PC, hi
} else if (op == BC_JFORI) {
- | ldrh RCw, [RC, #-2]
+ | ldrh RCw, [RC, #-4+OFS_RD]
| bls =>BC_JLOOP
} else if (op == BC_IFORL) {
| csel PC, RC, PC, ls
@@ -3488,7 +3700,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_JLOOP:
|.if JIT
- | NYI
+ | // RA = base (ignored), RC = traceno
+ | ldr CARG1, [GL, #GL_J(trace)]
+ | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
+ | ldr TRACE:RC, [CARG1, RC, lsl #3]
+ | st_vmstate CARG2w
+ | ldr RA, TRACE:RC->mcode
+ | str BASE, GL->jit_base
+ | str L, GL->tmpbuf.L
+ | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
+ | br RA
|.endif
break;
@@ -3546,10 +3767,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_IFUNCV:
| // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
| ldr CARG1, L->maxstack
+ | movn TMP0, #~LJ_TFUNC
| add TMP2, BASE, RC
+ | add LFUNC:CARG3, CARG3, TMP0, lsl #47
| add RA, RA, RC
| add TMP0, RC, #16+FRAME_VARG
- | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC.
+ | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
| ldr KBASE, [PC, #-4+PC2PROTO(k)]
| cmp RA, CARG1
| str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
@@ -3736,8 +3959,8 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.uleb128 0x1\n"
"\t.sleb128 -8\n"
"\t.byte 30\n" /* Return address is in lr. */
- "\t.uleb128 1\n" /* augmentation length */
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
"\t.align 3\n"
".LECIE2:\n\n");
@@ -3748,7 +3971,7 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.long .LASFDE3-.Lframe2\n"
"\t.long lj_vm_ffi_call-.\n"
"\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
+ "\t.uleb128 0\n" /* augmentation length */
"\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
"\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
"\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */