Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFadi Hanna <fadim@microsoft.com>2018-07-05 19:23:52 +0300
committerFadi Hanna <fadim@microsoft.com>2018-07-05 19:23:52 +0300
commit5c54131e87a5a9b52b16697b3459f2249d32146e (patch)
treec275b50b6ea7ff77c6389f1033f5fd9608c351a1 /src/Native
parent303df5960283a70111bd5e09fcd5e1792a502a36 (diff)
Fixing a perf issue discovered in the arm64 assembly stubs: using the BR instruction for tail calls, instead of the RET instruction.
These changes should improve the overall performance on ARM64. An example of measured perf gains: 362% improvement for interface calls on cached cells. Here's the test output for 100000000 iterations: BEFORE the changes: ======================= "LowLevelPerf.exe" -name INTERFACE_INTERFACE_METHOD -iters 100000000 INTERFACE_INTERFACE_METHOD Dynamic Timer = 425064.815690 Iters/Sec Seconds = 235.258239 Seconds Process Cycles = 565810805623.000000 Cycles Process Cycles/Iter = 5658.108056 Cycles/Iters AFTER the changes: ======================= "LowLevelPerf.exe" -name INTERFACE_INTERFACE_METHOD -iters 100000000 INTERFACE_INTERFACE_METHOD Dynamic Timer = 1531103.010283 Iters/Sec Seconds = 65.312392 Seconds Process Cycles = 156754061586.000000 Cycles Process Cycles/Iter = 1567.540616 Cycles/Iters [tfs-changeset: 1706580]
Diffstat (limited to 'src/Native')
-rw-r--r--src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm2
-rw-r--r--src/Native/Runtime/arm64/ExceptionHandling.asm2
-rw-r--r--src/Native/Runtime/arm64/GcProbe.asm6
-rw-r--r--src/Native/Runtime/arm64/InteropThunksHelpers.asm2
-rw-r--r--src/Native/Runtime/arm64/MiscStubs.asm2
-rw-r--r--src/Native/Runtime/arm64/StubDispatch.asm8
-rw-r--r--src/Native/Runtime/arm64/ThunkPoolThunks.asm2
-rw-r--r--src/Native/Runtime/arm64/UniversalTransition.asm2
8 files changed, 13 insertions, 13 deletions
diff --git a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
index 2d43d5bba..f60e11578 100644
--- a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
+++ b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
@@ -46,7 +46,7 @@ POINTER_SIZE equ 0x08
ldr xip0, [xip0, #POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0
ldr x12, [xip0, #POINTER_SIZE] ; get address of UniversalTransitionThunk (which we'll tailcall to later)
ldr xip0, [xip0] ; get address of ManagedCallConverterThunk (target for universal thunk to call)
- ret x12
+ br x12
LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub
;;
diff --git a/src/Native/Runtime/arm64/ExceptionHandling.asm b/src/Native/Runtime/arm64/ExceptionHandling.asm
index c71194da9..35843afee 100644
--- a/src/Native/Runtime/arm64/ExceptionHandling.asm
+++ b/src/Native/Runtime/arm64/ExceptionHandling.asm
@@ -489,7 +489,7 @@ DonePopping
NoAbort
;; reset SP and jump to continuation address
mov sp, x2
- ret x0
+ br x0
NESTED_END RhpCallCatchFunclet
diff --git a/src/Native/Runtime/arm64/GcProbe.asm b/src/Native/Runtime/arm64/GcProbe.asm
index caa400ce4..73d674a5c 100644
--- a/src/Native/Runtime/arm64/GcProbe.asm
+++ b/src/Native/Runtime/arm64/GcProbe.asm
@@ -566,7 +566,7 @@ EXTRA_SAVE_SIZE equ (32*16)
EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
EPILOG_NOP ldr x9, [sp, #0x78]
EPILOG_RESTORE_REG_PAIR fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
- EPILOG_NOP ret x9
+ EPILOG_NOP br x9
NESTED_END RhpHijackForGcStressLeaf
@@ -675,7 +675,7 @@ EXTRA_SAVE_SIZE equ (32*16)
ldr x2, [sp, #PROBE_FRAME_SIZE]
FREE_PROBE_FRAME 0x10, {true} ; This restores exception object back into x0
EPILOG_NOP mov x1, x0 ; Move the Exception object back into x1 where the catch handler expects it
- EPILOG_NOP ret x2
+ EPILOG_NOP br x2
MEND
;;
@@ -847,7 +847,7 @@ DoneWaitingForGc
EPILOG_NOP ldr x1, [sp, #8] ; hijack target address
EPILOG_STACK_FREE 0x10
- EPILOG_NOP ret x1 ; jump to the hijack target
+ EPILOG_NOP br x1 ; jump to the hijack target
Abort
FREE_LOOP_HIJACK_FRAME
diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.asm b/src/Native/Runtime/arm64/InteropThunksHelpers.asm
index 88bb7da5d..cc8489ef1 100644
--- a/src/Native/Runtime/arm64/InteropThunksHelpers.asm
+++ b/src/Native/Runtime/arm64/InteropThunksHelpers.asm
@@ -58,7 +58,7 @@ __SECTIONREL_ThunkParamSlot
;; Now load the target address and jump to it.
ldr xip0, [xip0, #POINTER_SIZE]
- ret xip0
+ br xip0
LEAF_END RhCommonStub
diff --git a/src/Native/Runtime/arm64/MiscStubs.asm b/src/Native/Runtime/arm64/MiscStubs.asm
index ba18a93c2..99c05e4a3 100644
--- a/src/Native/Runtime/arm64/MiscStubs.asm
+++ b/src/Native/Runtime/arm64/MiscStubs.asm
@@ -96,7 +96,7 @@ RhpCheckCctor__SlowPath
EPILOG_RESTORE_REG_PAIR fp, lr, #0x20!
;; tail-call the class lib cctor check function. This function is required to return its first
;; argument, so that x0 can be preserved.
- EPILOG_NOP ret x12
+ EPILOG_NOP br x12
NESTED_END RhpCheckCctor__SlowPath2
diff --git a/src/Native/Runtime/arm64/StubDispatch.asm b/src/Native/Runtime/arm64/StubDispatch.asm
index 82a4f861e..8f3b7488c 100644
--- a/src/Native/Runtime/arm64/StubDispatch.asm
+++ b/src/Native/Runtime/arm64/StubDispatch.asm
@@ -55,7 +55,7 @@ SECTIONREL_t_TLS_DispatchCell
cmp x10, x11
bne %ft0 ;; Jump to label '0'
ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)]
- ret x9
+ br x9
0 ;; Label '0'
MEND
@@ -70,7 +70,7 @@ SECTIONREL_t_TLS_DispatchCell
;; Now load the target address and jump to it.
ldr x9, [xip0, #8]
- ret x9
+ br x9
LEAF_END RhpCastableObjectDispatch_CommonStub
LEAF_ENTRY RhpTailCallTLSDispatchCell
@@ -79,7 +79,7 @@ SECTIONREL_t_TLS_DispatchCell
;; Tail call to the target of the dispatch cell, preserving the cell address in xip1
ldr x9, [xip1]
- ret x9
+ br x9
LEAF_END RhpTailCallTLSDispatchCell
LEAF_ENTRY RhpCastableObjectDispatchHelper_TailCalled
@@ -168,7 +168,7 @@ CurrentEntry SETA CurrentEntry + 1
;; Load the target address of the vtable into x12
ldr x12, [x12]
- ret x12
+ br x12
LEAF_END RhpVTableOffsetDispatch
;;
diff --git a/src/Native/Runtime/arm64/ThunkPoolThunks.asm b/src/Native/Runtime/arm64/ThunkPoolThunks.asm
index ddd2636db..6d9a02ff6 100644
--- a/src/Native/Runtime/arm64/ThunkPoolThunks.asm
+++ b/src/Native/Runtime/arm64/ThunkPoolThunks.asm
@@ -42,7 +42,7 @@ RO$name % 8
;; fix offset to point to last QWROD in page : xip1 <- [xip0 + PAGE_SIZE - POINTER_SIZE]
;; tailcall to the location pointed at by the last qword in the data page
ldr xip1, [xip0, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))]
- ret xip1
+ br xip1
brk 0xf000 ;; Stubs need to be 16-byte aligned (see comment above). Filling padding with a
;; deterministic brk instruction, instead of having it just filled with zeros.
diff --git a/src/Native/Runtime/arm64/UniversalTransition.asm b/src/Native/Runtime/arm64/UniversalTransition.asm
index fe4763740..dc699ebed 100644
--- a/src/Native/Runtime/arm64/UniversalTransition.asm
+++ b/src/Native/Runtime/arm64/UniversalTransition.asm
@@ -147,7 +147,7 @@
EPILOG_RESTORE_REG_PAIR fp, lr, #STACK_SIZE!
;; Tailcall to the target address.
- EPILOG_NOP ret x12
+ EPILOG_NOP br x12
NESTED_END Rhp$FunctionName