Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Kotas <jkotas@microsoft.com>2016-01-09 20:18:13 +0300
committerJan Kotas <jkotas@microsoft.com>2016-01-09 20:18:13 +0300
commit770d16438a6fba4bfa96d4a86f557f61ef2427ae (patch)
treedd57350cbad7b62d6e932b6b90c03d2c220ae425 /src/Native
parenta32bfd7e32d863435d0320170bc625935a4ca680 (diff)
parentac9f0503dc5c4ada8a9ac7f71b054656fcd6c5a0 (diff)
Merge pull request #616 from dotnet/nmirror
Merge nmirror to master
Diffstat (limited to 'src/Native')
-rw-r--r--src/Native/Runtime/CMakeLists.txt1
-rw-r--r--src/Native/Runtime/UniversalTransitionHelpers.cpp69
-rw-r--r--src/Native/Runtime/amd64/AsmMacros.inc12
-rw-r--r--src/Native/Runtime/amd64/UniversalTransition.asm203
-rw-r--r--src/Native/Runtime/arm/AsmMacros.h38
-rw-r--r--src/Native/Runtime/arm/UniversalTransition.asm112
-rw-r--r--src/Native/Runtime/i386/AsmMacros.inc12
-rw-r--r--src/Native/Runtime/i386/UniversalTransition.asm35
-rw-r--r--src/Native/Runtime/inc/rhbinder.h1
9 files changed, 363 insertions, 120 deletions
diff --git a/src/Native/Runtime/CMakeLists.txt b/src/Native/Runtime/CMakeLists.txt
index edc352964..afafa46d8 100644
--- a/src/Native/Runtime/CMakeLists.txt
+++ b/src/Native/Runtime/CMakeLists.txt
@@ -36,6 +36,7 @@ set(COMMON_RUNTIME_SOURCES
SyncClean.cpp
thread.cpp
threadstore.cpp
+ UniversalTransitionHelpers.cpp
../gc/gccommon.cpp
../gc/gceewks.cpp
diff --git a/src/Native/Runtime/UniversalTransitionHelpers.cpp b/src/Native/Runtime/UniversalTransitionHelpers.cpp
new file mode 100644
index 000000000..1c423b379
--- /dev/null
+++ b/src/Native/Runtime/UniversalTransitionHelpers.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+//
+// Define tables of predictable distinguished values that RhpUniversalTransition can use to
+// trash argument registers after they have been saved into the transition frame.
+//
+// Trashing these registers is a testability aid that makes it easier to detect bugs where
+// the transition frame content is not correctly propagated to the eventual callee.
+//
+// In the absence of trashing, such bugs can become undetectable if the code that
+// dispatches the call happens to never touch the impacted argument register (e.g., xmm3 on
+// amd64 or d5 on arm32). In such a case, the original enregistered argument will flow
+// unmodified into the eventual callee, obscuring the fact that the dispatcher failed to
+// propagate the transition frame copy of this register.
+//
+// These tables are manually aligned as a conservative safeguard to ensure that the
+// consumers can use arbitrary access widths without ever needing to worry about alignment.
+// The comments in each table show the %d/%f renderings of each 32-bit value, plus the
+// %I64d/%f rendering of the combined 64-bit value of each aligned pair of 32-bit values.
+//
+
+#define TRASH_VALUE_ALIGNMENT 16
+
+EXTERN_C
+DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT)
+const UInt32 RhpIntegerTrashValues[] = {
+ // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32
+ // ----------- ----------- --------- --------- ------------------
+ 0x07801001U, 0x07802002U, // (125833217, 125837314) (540467148372316161)
+ 0x07803003U, 0x07804004U, // (125841411, 125845508) (540502341334347779)
+ 0x07805005U, 0x07806006U, // (125849605, 125853702) (540537534296379397)
+ 0x07807007U, 0x07808008U, // (125857799, 125861896) (540572727258411015)
+ 0x07809009U, 0x0780a00aU, // (125865993, 125870090) (540607920220442633)
+ 0x0780b00bU, 0x0780c00cU, // (125874187, 125878284) (540643113182474251)
+ 0x0780d00dU, 0x0780e00eU, // (125882381, 125886478) (540678306144505869)
+ 0x0780f00fU, 0x07810010U, // (125890575, 125894672) (540713499106537487)
+};
+
+EXTERN_C
+DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT)
+const UInt32 RhpFpTrashValues[] = {
+ // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32
+ // ----------- ----------- ------------------- ------------------- -------------------
+ 0x42001001U, 0x42002002U, // (32.0156288146972660, 32.0312576293945310) (8657061952.00781440)
+ 0x42003003U, 0x42004004U, // (32.0468864440917970, 32.0625152587890630) (8724187200.02344320)
+ 0x42005005U, 0x42006006U, // (32.0781440734863280, 32.0937728881835940) (8791312448.03907200)
+ 0x42007007U, 0x42008008U, // (32.1094017028808590, 32.1250305175781250) (8858437696.05470090)
+ 0x42009009U, 0x4200a00aU, // (32.1406593322753910, 32.1562881469726560) (8925562944.07032970)
+ 0x4200b00bU, 0x4200c00cU, // (32.1719169616699220, 32.1875457763671880) (8992688192.08595850)
+ 0x4200d00dU, 0x4200e00eU, // (32.2031745910644530, 32.2188034057617190) (9059813440.10158730)
+ 0x4200f00fU, 0x42010010U, // (32.2344322204589840, 32.2500610351562500) (9126938688.11721610)
+};
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
diff --git a/src/Native/Runtime/amd64/AsmMacros.inc b/src/Native/Runtime/amd64/AsmMacros.inc
index 487623528..63f0a3614 100644
--- a/src/Native/Runtime/amd64/AsmMacros.inc
+++ b/src/Native/Runtime/amd64/AsmMacros.inc
@@ -277,7 +277,11 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
;; - All preserved registers remain unchanged from their values in managed code.
;;
-PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack
+;; If <NoModeSwitch> is set, threadReg is ignored. As a result, <NoModeSwitch> can only be used by helpers
+;; which guarantee that they will never transition into pre-emptive mode and will never examine the m_pThread
+;; field of the frame.
+;;
+PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack, NoModeSwitch
lea trashReg, [rsp + 8h + extraStack]
push_vol_reg trashReg ; save caller's RSP
push_nonvol_reg r15 ; save preserved registers
@@ -288,7 +292,11 @@ PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack
push_nonvol_reg rsi ; ..
push_nonvol_reg rbx ; ..
push_imm DEFAULT_FRAME_SAVE_FLAGS ; save the register bitmask
+ifb <NoModeSwitch>
push_vol_reg threadReg ; Thread * (unused by stackwalker)
+else
+ push_imm 0
+endif
push_nonvol_reg rbp ; save caller's RBP
mov trashReg, [rsp + 11*8 + extraStack] ; Find the return address
push_vol_reg trashReg ; save m_RIP
@@ -297,8 +305,10 @@ PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack
;; allocate scratch space and any required alignment
alloc_stack 20h + ((extraStack+8) AND (10h-1))
+ifb <NoModeSwitch>
; link the frame into the Thread
mov [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], trashReg
+endif
endm
;;
diff --git a/src/Native/Runtime/amd64/UniversalTransition.asm b/src/Native/Runtime/amd64/UniversalTransition.asm
index 4ccd8bd42..f4032f2b9 100644
--- a/src/Native/Runtime/amd64/UniversalTransition.asm
+++ b/src/Native/Runtime/amd64/UniversalTransition.asm
@@ -7,6 +7,66 @@ include AsmMacros.inc
ifdef FEATURE_DYNAMIC_CODE
+ifdef _DEBUG
+TRASH_SAVED_ARGUMENT_REGISTERS equ 1
+else
+TRASH_SAVED_ARGUMENT_REGISTERS equ 0
+endif
+
+if TRASH_SAVED_ARGUMENT_REGISTERS ne 0
+EXTERN RhpIntegerTrashValues : QWORD
+EXTERN RhpFpTrashValues : QWORD
+endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
+SIZEOF_RETADDR equ 8h
+
+SIZEOF_PINVOKE_FRAME_PTR equ 8h
+
+SIZEOF_RETURN_BLOCK equ 10h ; for 16 bytes of conservatively reported space that the callee can
+ ; use to manage the return value that the call eventually generates
+
+SIZEOF_FP_REGS equ 40h ; xmm0-3
+
+SIZEOF_PINVOKE_FRAME equ 60h
+
+SIZEOF_OUT_REG_HOMES equ 20h ; Callee register spill
+
+;
+; From CallerSP to ChildSP, the stack frame is composed of the following six adjacent
+; regions:
+;
+; SIZEOF_RETADDR
+; SIZEOF_PINVOKE_FRAME_PTR
+; SIZEOF_RETURN_BLOCK
+; SIZEOF_FP_REGS
+; SIZEOF_PINVOKE_FRAME
+; SIZEOF_OUT_REG_HOMES
+;
+
+DISTANCE_FROM_FP_REGS_TO_CALLERSP equ SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_PINVOKE_FRAME_PTR + SIZEOF_RETADDR
+
+DISTANCE_FROM_CHILDSP_TO_FP_REGS equ SIZEOF_OUT_REG_HOMES + SIZEOF_PINVOKE_FRAME
+
+DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + SIZEOF_FP_REGS
+
+DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + DISTANCE_FROM_FP_REGS_TO_CALLERSP
+
+; RBP is required to point one slot above the PInvoke frame pointer and therefore points
+; to the caller return address.
+DISTANCE_FROM_CHILDSP_TO_RBP equ DISTANCE_FROM_CHILDSP_TO_CALLERSP - SIZEOF_RETADDR
+
+; Note that the PInvoke frame lies directly below the FP regs area.
+DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP equ DISTANCE_FROM_FP_REGS_TO_CALLERSP
+DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR equ DISTANCE_FROM_FP_REGS_TO_CALLERSP - SIZEOF_RETADDR
+
+;
+; Note: The distance from the top of the PInvoke frame to the CallerSP must be a multiple
+; of 16. If not, PUSH_COOP_PINVOKE_FRAME will inject 8 bytes of padding (in order to
+; ensure a 16-byte aligned ChildSP) and will therefore break the expected stack layout.
+;
+
+.errnz DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP mod 16
+
;;
;; Defines an assembly thunk used to make a transition from managed code to a callee,
;; then (based on the return value from the callee), either returning or jumping to
@@ -21,88 +81,107 @@ ifdef FEATURE_DYNAMIC_CODE
;; r11: The only parameter to the target function (passed in rdx to callee)
;;
-SIZEOF_OUT_REG_HOMES equ 20h ; Callee register spill
-SIZEOF_FP_REGS equ 40h ; xmm0-3
-SIZEOF_PINVOKE_FRAME equ 80h ; for default arg push
-SIZEOF_SCRATCH_SPACE equ 10h ; for 16 bytes of conservatively reported scratch space
-OFFSETOF_FP_ARG_SPILL equ SIZEOF_PINVOKE_FRAME + 10h
-OFFSETOF_SCRATCH_SPACE equ OFFSETOF_FP_ARG_SPILL + SIZEOF_FP_REGS
-
-ALLOC_SIZE equ SIZEOF_FP_REGS + SIZEOF_SCRATCH_SPACE + 10h
-SIZEOF_STACK_FRAME equ SIZEOF_PINVOKE_FRAME + ALLOC_SIZE + 10h
-
-
-; [callee return]
-; [out rcx]
-; [out rdx]
-; [out r8]
-; [out r9]
-; [pinvoke frame, 60h]
-; [XMM regs, 40h]
-; [ConservativelyReportedScratchSpace 10h] (+0xc0)
-; [ptr to pinvoke frame 8h] (+0xd0)
-; [caller return addr] (+0xd8)
-; [in rcx]
-; [in rdx]
-; [in r8]
-; [in r9]
-
+;
+; Stack frame layout (from lower addresses to higher addresses):
+;
+; [callee return] ChildSP-008 CallerSP-0e8
+; [out rcx] ChildSP+000 CallerSP-0e0
+; [out rdx] ChildSP+008 CallerSP-0d8
+; [out r8] ChildSP+010 CallerSP-0d0
+; [out r9] ChildSP+018 CallerSP-0c8
+; [pinvoke frame, 60h] ChildSP+020 CallerSP-0c0
+; [XMM regs (argument regs from the caller), 40h] ChildSP+080 CallerSP-060
+; [ConservativelyReportedReturnBlock 10h] ChildSP+0c0 CallerSP-020
+; [ptr to pinvoke frame 8h] ChildSP+0d0 CallerSP-010
+; [caller return addr] ChildSP+0d8 CallerSP-008
+; [in rcx (argument reg from the caller)] ChildSP+0e0 CallerSP+000
+; [in rdx (argument reg from the caller)] ChildSP+0e8 CallerSP+008
+; [in r8 (argument reg from the caller)] ChildSP+0f0 CallerSP+010
+; [in r9 (argument reg from the caller)] ChildSP+0f8 CallerSP+018
+; [stack-passed arguments from the caller] ChildSP+100 CallerSP+020
+;
+; Note: The callee receives a pointer to the base of the conservatively reported return
+; block, and the callee has knowledge of the exact layout of all pieces of the frame
+; that lie at or above the pushed XMM registers.
+;
NESTED_ENTRY RhpUniversalTransition, _TEXT
- mov [rsp+8h], r10 ; Temporarily save r10 as it's actually a parameter
- alloc_stack ALLOC_SIZE
+ alloc_stack DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR
- ; @TODO We are getting the thread here to avoid bifurcating PUSH_COOP_PINVOKE_FRAME, but we
- ; really don't need the frame stored in the "hack pinvoke tunnel" because this codepath
- ; doesn't use Enable/DisablePreemtiveGC
-
- INLINE_GETTHREAD rax, r10 ; rax <- Thread pointer, r10 <- trashed
- PUSH_COOP_PINVOKE_FRAME rax, r10, ALLOC_SIZE
-
- mov rax, [rsp + SIZEOF_STACK_FRAME] ; restore r10 from input into rax
- save_reg_postrsp rcx, 0h + SIZEOF_STACK_FRAME
- save_reg_postrsp rdx, 8h + SIZEOF_STACK_FRAME
- save_reg_postrsp r8, 10h + SIZEOF_STACK_FRAME
- save_reg_postrsp r9, 18h + SIZEOF_STACK_FRAME
-
- save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL
- save_xmm128_postrsp xmm1, OFFSETOF_FP_ARG_SPILL + 10h
- save_xmm128_postrsp xmm2, OFFSETOF_FP_ARG_SPILL + 20h
- save_xmm128_postrsp xmm3, OFFSETOF_FP_ARG_SPILL + 30h
+ ; Build the frame that the stack walker will use to unwind through this function. The
+ ; <NoModeSwitch> flag indicates that this function never uses Enable/DisablePreemptiveGC,
+ ; implying that frame address does not need to be recorded in the current thread object.
+ ; This macro trashes rax but does not trash any other registers.
+
+ PUSH_COOP_PINVOKE_FRAME notUsed, rax, DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR, <NoModeSwitch>
+
+ ; Note that rax now holds the address of the newly allocated frame. Also note that, in
+ ; addition to allocating the PInvoke frame, the macro also allocated the outgoing
+ ; arguments area.
+
+ save_reg_postrsp rcx, 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+ save_reg_postrsp rdx, 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+ save_reg_postrsp r8, 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+ save_reg_postrsp r9, 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+
+ save_xmm128_postrsp xmm0, DISTANCE_FROM_CHILDSP_TO_FP_REGS
+ save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h
+ save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h
+ save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h
END_PROLOGUE
-
+
; Set rbp to point after our PInvokeTransitionFrame pointer, then store the pointer to this frame
; See StackFrameIterator::HandleManagedCalloutThunk.
- lea rbp, [rsp+SIZEOF_PINVOKE_FRAME + ALLOC_SIZE + 8h]
- mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], r10
-
+ lea rbp, [rsp + DISTANCE_FROM_CHILDSP_TO_RBP]
+ mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], rax
+
+if TRASH_SAVED_ARGUMENT_REGISTERS ne 0
+
+ ; Before calling out, trash all of the argument registers except the ones (rcx, rdx) that
+ ; hold outgoing arguments. All of these registers have been saved to the transition
+ ; frame, and the code at the call target is required to use only the transition frame
+ ; copies when dispatching this call to the eventual callee.
+
+ movsd xmm0, mmword ptr [RhpFpTrashValues + 0h]
+ movsd xmm1, mmword ptr [RhpFpTrashValues + 8h]
+ movsd xmm2, mmword ptr [RhpFpTrashValues + 10h]
+ movsd xmm3, mmword ptr [RhpFpTrashValues + 18h]
+
+ mov r8, qword ptr [RhpIntegerTrashValues + 10h]
+ mov r9, qword ptr [RhpIntegerTrashValues + 18h]
+
+endif ; TRASH_SAVED_ARGUMENT_REGISTERS
+
;
; Call out to the target, while storing and reporting arguments to the GC.
;
mov rdx, r11
- lea rcx, [rsp + OFFSETOF_SCRATCH_SPACE]
- call rax
+ lea rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK]
+ call r10
LABELED_RETURN_ADDRESS ReturnFromUniversalTransition
; restore fp argument registers
- movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL ]
- movdqa xmm1, [rsp + OFFSETOF_FP_ARG_SPILL + 10h]
- movdqa xmm2, [rsp + OFFSETOF_FP_ARG_SPILL + 20h]
- movdqa xmm3, [rsp + OFFSETOF_FP_ARG_SPILL + 30h]
+ movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS ]
+ movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h]
+ movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h]
+ movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h]
; restore integer argument registers
- mov rcx, [rsp + 0h + SIZEOF_STACK_FRAME]
- mov rdx, [rsp + 8h + SIZEOF_STACK_FRAME]
- mov r8, [rsp + 10h + SIZEOF_STACK_FRAME]
- mov r9, [rsp + 18h + SIZEOF_STACK_FRAME]
+ mov rcx, [rsp + 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+ mov rdx, [rsp + 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+ mov r8, [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+ mov r9, [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
; epilog
nop
-
- POP_COOP_PINVOKE_FRAME ALLOC_SIZE
- add rsp, ALLOC_SIZE
+
+ ; Pop the outgoing arguments area and the PInvoke frame.
+ POP_COOP_PINVOKE_FRAME DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR
+
+ ; Pop the extra space that was allocated between the PInvoke frame and the caller return address.
+ add rsp, DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR
TAILJMP_RAX
diff --git a/src/Native/Runtime/arm/AsmMacros.h b/src/Native/Runtime/arm/AsmMacros.h
index 104fa61b1..3b9ab20c6 100644
--- a/src/Native/Runtime/arm/AsmMacros.h
+++ b/src/Native/Runtime/arm/AsmMacros.h
@@ -113,13 +113,19 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP
;; on the current thread, ready to be used if and when the helper needs to transition to pre-emptive mode.
;;
;; INVARIANTS
-;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and
-;; certainly appear before any attempt to alter the stack pointer.
+;; - The macro defines the tail of the method prolog (and commonly defines the entire method prolog in the
+;; case where it is the first code in the method). The macro therefore must be the last code in a method's
+;; prolog. The $ExtraStack parameter must be set to the size (in bytes) of the stack allocations, if any,
+;; which were done in the preceding parts of the method prolog.
;; - This macro uses r4 and r5 (after their initial values have been saved in the frame) and upon exit r4
;; will contain the current Thread*.
;;
+;; If $NoModeSwitch is set, the current thread is not loaded and r4 is zero on exit. As a result,
+;; $NoModeSwitch can only be used by helpers which guarantee that they will never transition into pre-emptive
+;; mode and will never examine the m_pThread field of the frame.
+;;
MACRO
- COOP_PINVOKE_FRAME_PROLOG
+ COOP_PINVOKE_FRAME_PROLOG_TAIL $ExtraStack, $NoModeSwitch
PROLOG_STACK_ALLOC 4 ; Save space for caller's SP
PROLOG_PUSH {r4-r6,r8-r10} ; Save preserved registers
@@ -128,19 +134,40 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP
PROLOG_PUSH {r11,lr} ; Save caller's frame-chain pointer and PC
; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11).
- add r4, sp, #(12 * 4)
+ add r4, sp, #((12 * 4) + $ExtraStack)
str r4, [sp, #(11 * 4)]
; Record the bitmask of saved registers in the frame (slot #4).
mov r4, #DEFAULT_FRAME_SAVE_FLAGS
str r4, [sp, #(4 * 4)]
+ IF "$NoModeSwitch" == ""
+
; Save the current Thread * in the frame (slot #3).
INLINE_GETTHREAD r4, r5
str r4, [sp, #(3 * 4)]
; Store the frame in the thread
str sp, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+
+ ELSE
+
+ ; Clear the m_pThread field in the frame. This is not strictly required for correctness
+ ; but guards against confusion in case the uninitialized content is ever accidentally
+ ; interpreted as a thread pointer (e.g., during debugging).
+ mov r4, #0
+ str r4, [sp, #(3 * 4)]
+
+ ENDIF
+
+ MEND
+
+;; Macro used in the common case where PInvoke frame construction constitutes the entire prolog of the
+;; helper and not just the tail end.
+ MACRO
+ COOP_PINVOKE_FRAME_PROLOG
+
+ COOP_PINVOKE_FRAME_PROLOG_TAIL 0
MEND
;; Pop the frame and restore register state preserved by COOP_PINVOKE_FRAME_PROLOG but don't return to the
@@ -276,4 +303,5 @@ $Name
#ifdef FEATURE_GC_STRESS
EXTERN $REDHAWKGCINTERFACE__STRESSGC
EXTERN $THREAD__HIJACKFORGCSTRESS
-#endif ;; FEATURE_GC_STRESS \ No newline at end of file
+#endif ;; FEATURE_GC_STRESS
+
diff --git a/src/Native/Runtime/arm/UniversalTransition.asm b/src/Native/Runtime/arm/UniversalTransition.asm
index a0606ac0c..0fe0926a4 100644
--- a/src/Native/Runtime/arm/UniversalTransition.asm
+++ b/src/Native/Runtime/arm/UniversalTransition.asm
@@ -5,6 +5,15 @@
#include "AsmMacros.h"
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+ EXTERN RhpIntegerTrashValues
+ EXTERN RhpFpTrashValues
+#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
#define COUNT_ARG_REGISTERS (4)
#define INTEGER_REGISTER_SIZE (4)
#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
@@ -15,18 +24,31 @@
#define COUNT_FLOAT_ARG_REGISTERS (8)
#define FLOAT_REGISTER_SIZE (8)
#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE)
+
+#define TRANSITION_FRAMEPOINTER_AND_ALIGNMENT 8
+
#define PINVOKE_TRANSITION_BLOCK_SIZE (12*INTEGER_REGISTER_SIZE)
-#define PINVOKE_TRANSITION_FRAME_SP_OFFSET (0)
-#define PINVOKE_TRANSITION_FRAME_FLAGS (4 * 7)
+;;
+;; From CallerSP to ChildSP, the stack frame is composed of the following five adjacent
+;; regions:
+;;
+;; ARGUMENT_REGISTERS_SIZE
+;; RETURN_BLOCK_SIZE
+;; FLOAT_ARG_REGISTERS_SIZE
+;; TRANSITION_FRAMEPOINTER_AND_ALIGNMENT
+;; PINVOKE_TRANSITION_BLOCK_SIZE
+;;
+;; R7 points to the top of the TRANSITION_FRAMEPOINTER_AND_ALIGNMENT region.
+;;
+
+#define DISTANCE_FROM_CHILDSP_TO_R7 (PINVOKE_TRANSITION_BLOCK_SIZE + TRANSITION_FRAMEPOINTER_AND_ALIGNMENT)
-#define TRANSITION_FRAMEPOINTER_AND_ALIGNMENT 8
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (DISTANCE_FROM_CHILDSP_TO_R7 + FLOAT_ARG_REGISTERS_SIZE)
-#define TRANSITION_FRAME_STACK_OFFSET (TRANSITION_FRAMEPOINTER_AND_ALIGNMENT)
-#define FLOATING_ARGS_STACK_OFFSET (TRANSITION_FRAME_STACK_OFFSET + PINVOKE_TRANSITION_BLOCK_SIZE)
-#define RETURN_BLOCK_STACK_OFFSET (FLOATING_ARGS_STACK_OFFSET + FLOAT_ARG_REGISTERS_SIZE)
-#define ARG_REGISTERS_OFFSET (RETURN_BLOCK_STACK_OFFSET + RETURN_BLOCK_SIZE)
-#define INITIAL_STACK_POINTER_OFFSET (ARG_REGISTERS_OFFSET + ARGUMENT_REGISTERS_SIZE)
+#define DISTANCE_FROM_CHILDSP_TO_CALLERSP (DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK + RETURN_BLOCK_SIZE + ARGUMENT_REGISTERS_SIZE)
+
+#define DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP (DISTANCE_FROM_CHILDSP_TO_CALLERSP - PINVOKE_TRANSITION_BLOCK_SIZE)
TEXTAREA
@@ -53,7 +75,8 @@
;; R0
;; RETURN BLOCK (32 byte chunk of conservatively handled memory)
;; ------ The base address of the Return block is the TransitionBlock pointer, the floating point args are
-;; in the neg space of the TransitionBlock pointer.
+;; in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact
+;; layout of all pieces of the frame that lie at or above the pushed floating point registers.
;; D7
;; D6
;; D5
@@ -62,11 +85,11 @@
;; D2
;; D1
;; D0
+;; Pointer to Transition Frame
+;; Alignment Padding (4 bytes)
;;---------------------
;; PINVOKE TRANSITION_FRAME
;;---------------------
-;; Pointer to Transition Frame
-;; Alignment Padding (4 bytes)
;; r0 shall contain a pointer to the TransitionBlock
;; r1 shall contain the value that was in sp-8 at entry to this function
@@ -84,36 +107,53 @@
PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE ; Save space a buffer to be used to hold return buffer data.
PROLOG_VPUSH {d0-d7} ; Capture the floating point argument registers
- ;; Build PInvokeTransitionFrame. This is used to ensure all arguments are reported conservatively.
-
- PROLOG_STACK_ALLOC 4 ; Align the stack and save space for caller's SP
- PROLOG_PUSH {r4-r6,r8-r10} ; Save preserved registers
- PROLOG_STACK_ALLOC 8 ; Save space for flags and Thread*
- PROLOG_PUSH {r7} ; Save caller's FP
- PROLOG_PUSH {r11,lr} ; Save caller's frame-chain pointer and PC
-
- ;; Build space to save pointer to transition frame, and setup frame pointer
- PROLOG_STACK_SAVE r7
+ ;; Build space to save pointer to transition frame
PROLOG_STACK_ALLOC TRANSITION_FRAMEPOINTER_AND_ALIGNMENT ; Space for transition frame pointer plus stack alignment padding
- ;; Compute Transition frame address and store into frame
- add r1, sp, #(TRANSITION_FRAME_STACK_OFFSET)
- str r1, [r7, #MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET]
+ ;; Build the transition frame that the stack walker will use to unwind through this function.
+ ;; The NoModeSwitch flag indicates that this function never uses Enable/DisablePreemptiveGC,
+ ;; implying that frame address does not need to be recorded in the current thread object.
+ ;; This macro trashes r4 (after it is saved into the frame) but does not trash any other registers.
- ;; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11).
- add r1, sp, #(INITIAL_STACK_POINTER_OFFSET)
- str r1, [sp, #(TRANSITION_FRAME_STACK_OFFSET + (11 * 4))]
+ COOP_PINVOKE_FRAME_PROLOG_TAIL DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP, NoModeSwitch
- ;; Record the bitmask of saved registers in the frame (slot #4).
- mov r1, #DEFAULT_FRAME_SAVE_FLAGS
- str r1, [sp, #(TRANSITION_FRAME_STACK_OFFSET + (4 * 4))]
+ ;; The prolog has ended, r7 has been saved into the transition frame, and sp now holds
+ ;; the address of the newly allocated transition frame. If the stack walker unwinds
+ ;; through this function, it will locate the transition frame pointer by checking the stack
+ ;; slot directly below whatever address is in r7. Point r7 to the top of the 8 byte save
+ ;; area that was allocated above, then store the transition frame address directly below it.
+ add r7, sp, #DISTANCE_FROM_CHILDSP_TO_R7
+ str sp, [r7, #MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET]
;; Setup the arguments to the transition thunk.
mov r1, r3
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+ ;; Before calling out, trash all of the argument registers except the ones (r0, r1) that
+ ;; hold outgoing arguments. All of these registers have been saved to the transition
+ ;; frame, and the code at the call target is required to use only the transition frame
+ ;; copies when dispatching this call to the eventual callee.
+
+ ldr r3, =RhpFpTrashValues
+ vldr d0, [r3, #(0 * 8)]
+ vldr d1, [r3, #(1 * 8)]
+ vldr d2, [r3, #(2 * 8)]
+ vldr d3, [r3, #(3 * 8)]
+ vldr d4, [r3, #(4 * 8)]
+ vldr d5, [r3, #(5 * 8)]
+ vldr d6, [r3, #(6 * 8)]
+ vldr d7, [r3, #(7 * 8)]
+
+ ldr r3, =RhpIntegerTrashValues
+ ldr r2, [r3, #(2 * 4)]
+ ldr r3, [r3, #(3 * 4)]
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
;; Make the ReturnFromUniversalTransition alternate entry 4 byte aligned
ALIGN 4
- add r0, sp, #(RETURN_BLOCK_STACK_OFFSET) ; First parameter to target function is a pointer to the return block
+ add r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK ;; First parameter to target function is a pointer to the return block
blx r12
LABELED_RETURN_ADDRESS ReturnFromUniversalTransition
@@ -121,16 +161,12 @@
;; argument registers. Additionally make sure the thumb2 bit is set.
orr r12, r0, #1
+ ;; Pop the PInvokeTransitionFrame
+ COOP_PINVOKE_FRAME_EPILOG_NO_RETURN
+
;; Pop the transition frame pointer and alignment
EPILOG_STACK_FREE TRANSITION_FRAMEPOINTER_AND_ALIGNMENT ; Discard transition pointer region
- ;; Pop the PInvokeTransitionFrame
- EPILOG_POP {r11,lr} ; Restore caller's frame-chain pointer and PC (return address)
- EPILOG_POP {r7} ; Restore caller's FP
- EPILOG_STACK_FREE 8 ; Discard flags and Thread*
- EPILOG_POP {r4-r6,r8-r10} ; Restore preserved registers
- EPILOG_STACK_FREE 4 ; Discard caller's SP and stack alignment padding
-
;; Restore the argument registers.
EPILOG_VPOP {d0-d7}
EPILOG_STACK_FREE RETURN_BLOCK_SIZE ; pop return block conservatively reported area
diff --git a/src/Native/Runtime/i386/AsmMacros.inc b/src/Native/Runtime/i386/AsmMacros.inc
index ffda6561e..8a249d2c1 100644
--- a/src/Native/Runtime/i386/AsmMacros.inc
+++ b/src/Native/Runtime/i386/AsmMacros.inc
@@ -85,21 +85,31 @@ endm
;;
;; EAX is trashed by this macro.
;;
-PUSH_COOP_PINVOKE_FRAME macro threadReg
+;; If <NoModeSwitch> is set, threadReg is ignored. As a result, <NoModeSwitch> can only be used by helpers
+;; which guarantee that they will never transition into pre-emptive mode and will never examine the m_pThread
+;; field of the frame.
+;;
+PUSH_COOP_PINVOKE_FRAME macro threadReg, NoModeSwitch
lea eax, [ebp + 8] ; get the ESP of the caller
push eax ; save ESP
push edi
push esi
push ebx
push PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
+ifb <NoModeSwitch>
push threadReg ; Thread * (unused by stackwalker)
+else
+ push 0
+endif
mov eax, [ebp + 0] ; Find previous EBP value
push eax ; save EBP
mov eax, [ebp + 4] ; Find the return address
push eax ; save m_RIP
lea eax, [esp + 0] ; EAX == address of frame
+ifb <NoModeSwitch>
mov [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], eax ; Link frame to thread
+endif
endm
;;
diff --git a/src/Native/Runtime/i386/UniversalTransition.asm b/src/Native/Runtime/i386/UniversalTransition.asm
index fcfb247cc..7abd1fc63 100644
--- a/src/Native/Runtime/i386/UniversalTransition.asm
+++ b/src/Native/Runtime/i386/UniversalTransition.asm
@@ -33,15 +33,26 @@ ifdef FEATURE_DYNAMIC_CODE
;; FRAME POINTERS, OR THE STACK WALKER CAN'T STACKWALK OUT OF HERE
;;
+;
+; Stack frame layout (from lower addresses to higher addresses):
+;
; [callee return]
; [pinvoke frame, 20h]
-; [in edx]
-; [in ecx]
+; [in edx (argument register from the caller)]
+; [in ecx (argument register from the caller)]
; [ConservativelyReportedScratchSpace 8h]
+; -- On input (i.e., when control jumps to RhpUniversalTransition), the low 4 bytes of
+; this area contain the address of the callee and the high 4 bytes of this area contain
+; the extra argument to pass to the callee.
; [ptr to pinvoke frame 4h]
; [saved ebp register]
; [caller return addr]
-
+; [stack-passed arguments from the caller]
+;
+; Note: The callee receives a pointer to the pushed edx value, and the callee has
+; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+; edx value.
+;
FASTCALL_FUNC RhpUniversalTransition_FAKE_ENTRY, 0
; Set up an ebp frame
@@ -54,24 +65,22 @@ ALTERNATE_ENTRY RhpUniversalTransition@0
push ecx
push edx
- ; @TODO We are getting the thread here to avoid bifurcating PUSH_COOP_PINVOKE_FRAME, but we
- ; really don't need the frame stored in the "hack pinvoke tunnel" because this codepath
- ; doesn't use Enable/DisablePreemtiveGC
-
- INLINE_GETTHREAD edx, eax ; edx <- Thread pointer, eax <- trashed
-
- PUSH_COOP_PINVOKE_FRAME edx
+ ; Build the frame that the stack walker will use to unwind through this function. The
+ ; <NoModeSwitch> flag indicates that this function never uses Enable/DisablePreemptiveGC,
+ ; implying that frame address does not need to be recorded in the current thread object.
+ PUSH_COOP_PINVOKE_FRAME notUsed, <NoModeSwitch>
;; Stash the pinvoke frame's address immediately on top of the old ebp value. This
;; position is important; the stack frame iterator knows about this setup.
+.erre MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET eq -4
mov [ebp-4], esp
;
; Call out to the target, while storing and reporting arguments to the GC.
;
- mov eax, [ebp-0Ch]
- mov edx, [ebp-8] ; Get first argument
- lea ecx, [ebp-14h] ; Get pointer to argument information
+ mov eax, [ebp-0Ch] ; Get the address of the callee
+ mov edx, [ebp-8] ; Get the extra argument to pass to the callee
+ lea ecx, [ebp-14h] ; Get pointer to edx value pushed above
call eax
LABELED_RETURN_ADDRESS ReturnFromUniversalTransition
diff --git a/src/Native/Runtime/inc/rhbinder.h b/src/Native/Runtime/inc/rhbinder.h
index 5e9c29baf..2fc1d6248 100644
--- a/src/Native/Runtime/inc/rhbinder.h
+++ b/src/Native/Runtime/inc/rhbinder.h
@@ -630,6 +630,7 @@ struct PInvokeTransitionFrame
TgtPTR_Void m_RIP;
TgtPTR_Void m_FramePointer;
TgtPTR_Thread m_pThread; // unused by stack crawler, this is so GetThread is only called once per method
+ // can be an invalid pointer in universal transition cases (which never need to call GetThread)
UInt32 m_dwFlags; // PInvokeTransitionFrameFlags
#ifdef _TARGET_AMD64_
UInt32 m_dwAlignPad2;