Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Mosier <smosier@microsoft.com>2016-02-08 23:44:58 +0300
committerScott Mosier <smosier@microsoft.com>2016-02-08 23:44:58 +0300
commite7cf9872439f602b39a320c6ade8c9af6dd2e817 (patch)
tree140ac9095691a81167d010f81fb4688daf00cae7 /src/Native/Runtime/i386
parentcfb63d562045aafa3a81a4834dd457ad60e81ba6 (diff)
Fix RhpLoopHijack to stop trashing important registers
The loop hijack worker routine is not honoring the contract that it should be. Namely, the runtime is not allowed to trash any registers in our worker (except r12 on ARM). The two big oversights were scratch FP registers and the flags registers. I have also added a per-module map from loop index to target address (thus requiring all the shash.h includes). This primarily helps gcstress throughput because the loop indirection cell address calculation ends up being surprisingly lengthy. I considered the other obvious approach of "back-patching" the loop indirection cell in the gcstress case (normal loop hijacking does this, but under gcstress, we do not). However, I ended up preferring this because it could help GC suspension latency in normal operation. [tfs-changeset: 1573401]
Diffstat (limited to 'src/Native/Runtime/i386')
-rw-r--r--src/Native/Runtime/i386/GcProbe.asm142
1 files changed, 104 insertions, 38 deletions
diff --git a/src/Native/Runtime/i386/GcProbe.asm b/src/Native/Runtime/i386/GcProbe.asm
index a23058ae4..7dc59a86c 100644
--- a/src/Native/Runtime/i386/GcProbe.asm
+++ b/src/Native/Runtime/i386/GcProbe.asm
@@ -3,6 +3,7 @@
;; See the LICENSE file in the project root for more information.
.586
+ .xmm
.model flat
option casemap:none
.code
@@ -565,59 +566,109 @@ EXTERN RecoverLoopHijackTarget : PROC
EXTERN _g_fGcStressStarted : DWORD
EXTERN RhpCall : PROC
+FXSAVE_SIZE equ 512
+
FASTCALL_FUNC RhpLoopHijack, 8 ;; ecx, edx are ignored, 12 bytes are on the stack, but we have to pretend
;; that we don't have any stack arguments so that debuggers don't mess up our
;; stack traces
;; On the stack:
- ;; [esp + 8] -> chunk sub-index (0-256) BEWARE: this has been sign-extended, but it is unsigned
- ;; [esp + 4] -> chunk starting index
- ;; [esp ] -> ModuleHeader *
+ ;; [esp + 0h] -> ModuleHeader *
+ ;; [esp + 4h] -> chunk starting index
+ ;; [esp + 8h] -> chunk sub-index (0-256) BEWARE: this has been sign-extended, but it is unsigned
;;
-
- ; spill some registers first, these will become part of the Frame, but not in this order
- push ecx ; ECX
-
- ;; Combine the two indexes and rearrange the stack
- xor ecx, ecx
- mov cl, [esp + 0Ch]
- add ecx, [esp + 08h]
- mov [esp + 08h], ecx
- mov ecx, [esp + 04h]
- mov [esp + 0Ch], ecx
- mov [esp + 04h], edx ; save EDX
- ;; On the stack:
- ;; [esp + C] -> ModuleHeader * -> return address
- ;; [esp + 8] -> indirection cell index for loop -> EBP save
- ;; [esp + 4] -> EDX save
- ;; [esp + 0] -> ECX save
+ pushfd ; eflags
+ push ecx
+ push edx
- ;; edx <- GetThread(), TRASHES ecx
- INLINE_GETTHREAD edx, ecx
+ ;;
+ ;; NOTE: Do not trash EAX or any preserved register until the PushProbeFrame
+ ;;
+
+ ;; On the stack:
+ ;; [esp + 0h] -> edx save
+ ;; [esp + 4h] -> ecx save
+ ;; [esp + 8h] -> eflags save
+ ;; [esp + 0ch] -> ModuleHeader *
+ ;; [esp + 10h] -> chunk starting index
+ ;; [esp + 14h] -> chunk sub-index (0-256) BEWARE: this has been sign-extended, but it is unsigned
+ ;;
+
+ ;; Combine the two indexes
+ xor ecx, ecx ; ecx <- 0
+ mov cl, [esp + 14h] ; ecx <- chunk sub-index
+ add ecx, [esp + 10h] ; ecx <- (chunk sub-index) + (chunk starting index) = (indirection cell index)
+ push ecx
+
+ ;; On the stack:
+ ;; [esp + 0h] -> indirection cell index
+ ;; [esp + 4h] -> edx save
+ ;; [esp + 8h] -> ecx save
+ ;; [esp + 0ch] -> eflags save
+ ;; [esp + 10h] -> ModuleHeader *
+ ;; [esp + 14h] -> scratch -> will be ebp save
+ ;; [esp + 18h] -> scratch -> will be return address
+ ;;
+
+ ;; Setup EBP frame
+ mov [esp + 14h], ebp
+ lea ebp, [esp + 14h]
+
+ ;; On the stack:
+ ;; [ebp - 14h] -> indirection cell index
+ ;; [ebp - 10h] -> edx save
+ ;; [ebp - 0ch] -> ecx save
+ ;; [ebp - 8h] -> eflags save
+ ;; [ebp - 4h] -> ModuleHeader *
+ ;; ebp:[ebp + 0h] -> ebp save
+ ;; [ebp + 4h] -> scratch -> will be return address
+ ;;
- mov ecx, [esp + 8]
- mov [esp + 8], ebp
- lea ebp, [esp + 8]
+ ;; make (aligned) space for the XMM spills
+ sub esp, FXSAVE_SIZE
+ and esp, NOT 0Fh
- ; EDX: Thread pointer
- ; ECX: indirection cell index
+ ;; @TODO: save AVX state (currently our code generator doesn't use AVX)
+ fxsave [esp]
- ; EDX already pushed as part of the frame
- ; ECX already pushed as part of the frame
+ ;; PushProbeFrame wants the Thread* in edx
+ INLINE_GETTHREAD edx, ecx ;; edx <- GetThread(), TRASHES ecx
+
+ ;; Push edx and ecx as part of the PInvokeTransitionFrame
+ push [ebp - 10h] ;; push edx
+ push [ebp - 0ch] ;; push ecx
PushProbeFrame PROBE_SAVE_FLAGS_EVERYTHING ;; pushes 9 dwords
- ; EDX: Thread pointer
- ; ECX: indirection cell index
- ; EAX: scratch
+ifdef _DEBUG
+ ;; trash the old save locations for ecx / edx to make sure we don't use them -- they must be restored from the
+ ;; PInvokeTransitionFrame
+ xor ecx, ecx
+ mov [ebp - 10h], ecx
+ mov [ebp - 0ch], ecx
+endif
- ; ecx already setup with indirecion cell index
- mov edx, [ebp + 4] ; load ModuleHeader *
+ ;; On the stack:
+ ;; esp:[esp + 0h] -> PInvokeTransitionFrame (pushed by PushProbeFrame)
+ ;; [esp + 24h] -> ECX save (part of PInvokeTransitionFrame)
+ ;; [esp + 28h] -> EDX save (part of PInvokeTransitionFrame)
+ ;; [esp + 2ch] -> FXSAVE area
+ ;; --------------------------------------------------
+ ;; [ebp - 14h] -> indirection cell index
+ ;; [ebp - 10h] -> edx save
+ ;; [ebp - ch] -> ecx save
+ ;; [ebp - 8h] -> eflags save
+ ;; [ebp - 4h] -> ModuleHeader *
+ ;; ebp:[ebp + 0h] -> ebp save
+ ;; [ebp + 4h] -> scratch -> will be return address
+
+ mov ecx, [ebp - 14h] ; ecx <- indirection cell index
+ mov edx, [ebp - 4h] ; edx <- ModuleHeader *
call RecoverLoopHijackTarget
- mov [ebp + 4], eax ; store original loop target as return address
+ mov [ebp + 4h], eax ; store original loop target as return address
mov [esp + OFFSETOF__PInvokeTransitionFrame__m_RIP], eax ; patch EIP in the Frame
- mov edx, [esp + 8] ; recover Thread * from Frame
+ mov edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_pThread] ; recover Thread * from Frame
; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
@@ -651,20 +702,35 @@ ifdef FEATURE_GC_STRESS
cmp al, 0
je @F
- mov [ebx + OFFSETOF__Thread__m_pHackPInvokeTunnel], esp
+ mov edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_pThread] ; recover Thread * from Frame
+ mov [edx + OFFSETOF__Thread__m_pHackPInvokeTunnel], esp ; esp is address of PInvokeTransitionFrame
mov eax, REDHAWKGCINTERFACE__STRESSGC
call RhpCall
@@:
endif ;; FEATURE_GC_STRESS
- mov ecx, esp
+ mov ecx, esp ; esp is address of PInvokeTransitionFrame
mov eax, _RhpWaitForGC
call RhpCall
DoneWaitingForGc:
+
+ ;; Shuffle the eflags next to ebp so that we don't have to do any funny business in the epilog that might trash
+ ;; the flags.
+ mov ecx, [ebp - 8h]
+ mov [ebp - 4h], ecx
+
+ ; Restore our integer register state from the PInvokeTransitionFrame
PopProbeFrame
pop ecx
pop edx
+
+ ; Restore our FP state from the FXSAVE area
+ fxrstor [esp]
+
+ ; Pop the rest of our frame
+ lea esp, [ebp - 4]
+ popfd
pop ebp
ret