diff options
author | Jan Kotas <jkotas@microsoft.com> | 2016-01-09 20:18:13 +0300 |
---|---|---|
committer | Jan Kotas <jkotas@microsoft.com> | 2016-01-09 20:18:13 +0300 |
commit | 770d16438a6fba4bfa96d4a86f557f61ef2427ae (patch) | |
tree | dd57350cbad7b62d6e932b6b90c03d2c220ae425 /src/Native | |
parent | a32bfd7e32d863435d0320170bc625935a4ca680 (diff) | |
parent | ac9f0503dc5c4ada8a9ac7f71b054656fcd6c5a0 (diff) |
Merge pull request #616 from dotnet/nmirror
Merge nmirror to master
Diffstat (limited to 'src/Native')
-rw-r--r-- | src/Native/Runtime/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/Native/Runtime/UniversalTransitionHelpers.cpp | 69 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/AsmMacros.inc | 12 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/UniversalTransition.asm | 203 | ||||
-rw-r--r-- | src/Native/Runtime/arm/AsmMacros.h | 38 | ||||
-rw-r--r-- | src/Native/Runtime/arm/UniversalTransition.asm | 112 | ||||
-rw-r--r-- | src/Native/Runtime/i386/AsmMacros.inc | 12 | ||||
-rw-r--r-- | src/Native/Runtime/i386/UniversalTransition.asm | 35 | ||||
-rw-r--r-- | src/Native/Runtime/inc/rhbinder.h | 1 |
9 files changed, 363 insertions, 120 deletions
diff --git a/src/Native/Runtime/CMakeLists.txt b/src/Native/Runtime/CMakeLists.txt index edc352964..afafa46d8 100644 --- a/src/Native/Runtime/CMakeLists.txt +++ b/src/Native/Runtime/CMakeLists.txt @@ -36,6 +36,7 @@ set(COMMON_RUNTIME_SOURCES SyncClean.cpp thread.cpp threadstore.cpp + UniversalTransitionHelpers.cpp ../gc/gccommon.cpp ../gc/gceewks.cpp diff --git a/src/Native/Runtime/UniversalTransitionHelpers.cpp b/src/Native/Runtime/UniversalTransitionHelpers.cpp new file mode 100644 index 000000000..1c423b379 --- /dev/null +++ b/src/Native/Runtime/UniversalTransitionHelpers.cpp @@ -0,0 +1,69 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +#include "common.h" +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + +// +// Define tables of predictable distinguished values that RhpUniversalTransition can use to +// trash argument registers after they have been saved into the transition frame. +// +// Trashing these registers is a testability aid that makes it easier to detect bugs where +// the transition frame content is not correctly propagated to the eventual callee. +// +// In the absence of trashing, such bugs can become undetectable if the code that +// dispatches the call happens to never touch the impacted argument register (e.g., xmm3 on +// amd64 or d5 on arm32). In such a case, the original enregistered argument will flow +// unmodified into the eventual callee, obscuring the fact that the dispatcher failed to +// propagate the transition frame copy of this register. +// +// These tables are manually aligned as a conservative safeguard to ensure that the +// consumers can use arbitrary access widths without ever needing to worry about alignment. +// The comments in each table show the %d/%f renderings of each 32-bit value, plus the +// %I64d/%f rendering of the combined 64-bit value of each aligned pair of 32-bit values. +// + +#define TRASH_VALUE_ALIGNMENT 16 + +EXTERN_C +DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT) +const UInt32 RhpIntegerTrashValues[] = { + // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32 + // ----------- ----------- --------- --------- ------------------ + 0x07801001U, 0x07802002U, // (125833217, 125837314) (540467148372316161) + 0x07803003U, 0x07804004U, // (125841411, 125845508) (540502341334347779) + 0x07805005U, 0x07806006U, // (125849605, 125853702) (540537534296379397) + 0x07807007U, 0x07808008U, // (125857799, 125861896) (540572727258411015) + 0x07809009U, 0x0780a00aU, // (125865993, 125870090) (540607920220442633) + 0x0780b00bU, 0x0780c00cU, // (125874187, 125878284) (540643113182474251) + 0x0780d00dU, 0x0780e00eU, // (125882381, 125886478) (540678306144505869) + 0x0780f00fU, 0x07810010U, // (125890575, 125894672) (540713499106537487) +}; + +EXTERN_C +DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT) +const UInt32 RhpFpTrashValues[] = { + // Lo32 Hi32 Lo32 Hi32 Hi32:Lo32 + // ----------- ----------- ------------------- ------------------- ------------------- + 0x42001001U, 0x42002002U, // (32.0156288146972660, 32.0312576293945310) (8657061952.00781440) + 0x42003003U, 0x42004004U, // (32.0468864440917970, 32.0625152587890630) (8724187200.02344320) + 0x42005005U, 0x42006006U, // (32.0781440734863280, 32.0937728881835940) (8791312448.03907200) + 0x42007007U, 0x42008008U, // (32.1094017028808590, 32.1250305175781250) (8858437696.05470090) + 0x42009009U, 0x4200a00aU, // (32.1406593322753910, 32.1562881469726560) (8925562944.07032970) + 0x4200b00bU, 0x4200c00cU, // (32.1719169616699220, 32.1875457763671880) (8992688192.08595850) + 0x4200d00dU, 0x4200e00eU, // (32.2031745910644530, 32.2188034057617190) (9059813440.10158730) + 0x4200f00fU, 0x42010010U, // (32.2344322204589840, 32.2500610351562500) (9126938688.11721610) +}; + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + diff --git a/src/Native/Runtime/amd64/AsmMacros.inc b/src/Native/Runtime/amd64/AsmMacros.inc index 487623528..63f0a3614 100644 --- a/src/Native/Runtime/amd64/AsmMacros.inc +++ b/src/Native/Runtime/amd64/AsmMacros.inc @@ -277,7 +277,11 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP ;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup. ;; - All preserved registers remain unchanged from their values in managed code. ;; -PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack +;; If <NoModeSwitch> is set, threadReg is ignored. As a result, <NoModeSwitch> can only be used by helpers +;; which guarantee that they will never transition into pre-emptive mode and will never examine the m_pThread +;; field of the frame. +;; +PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack, NoModeSwitch lea trashReg, [rsp + 8h + extraStack] push_vol_reg trashReg ; save caller's RSP push_nonvol_reg r15 ; save preserved registers @@ -288,7 +292,11 @@ PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack push_nonvol_reg rsi ; .. push_nonvol_reg rbx ; .. push_imm DEFAULT_FRAME_SAVE_FLAGS ; save the register bitmask +ifb <NoModeSwitch> push_vol_reg threadReg ; Thread * (unused by stackwalker) +else + push_imm 0 +endif push_nonvol_reg rbp ; save caller's RBP mov trashReg, [rsp + 11*8 + extraStack] ; Find the return address push_vol_reg trashReg ; save m_RIP @@ -297,8 +305,10 @@ PUSH_COOP_PINVOKE_FRAME macro threadReg, trashReg, extraStack ;; allocate scratch space and any required alignment alloc_stack 20h + ((extraStack+8) AND (10h-1)) +ifb <NoModeSwitch> ; link the frame into the Thread mov [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], trashReg +endif endm ;; diff --git a/src/Native/Runtime/amd64/UniversalTransition.asm b/src/Native/Runtime/amd64/UniversalTransition.asm index 4ccd8bd42..f4032f2b9 100644 --- a/src/Native/Runtime/amd64/UniversalTransition.asm +++ b/src/Native/Runtime/amd64/UniversalTransition.asm @@ -7,6 +7,66 @@ include AsmMacros.inc ifdef FEATURE_DYNAMIC_CODE +ifdef _DEBUG +TRASH_SAVED_ARGUMENT_REGISTERS equ 1 +else +TRASH_SAVED_ARGUMENT_REGISTERS equ 0 +endif + +if TRASH_SAVED_ARGUMENT_REGISTERS ne 0 +EXTERN RhpIntegerTrashValues : QWORD +EXTERN RhpFpTrashValues : QWORD +endif ;; TRASH_SAVED_ARGUMENT_REGISTERS + +SIZEOF_RETADDR equ 8h + +SIZEOF_PINVOKE_FRAME_PTR equ 8h + +SIZEOF_RETURN_BLOCK equ 10h ; for 16 bytes of conservatively reported space that the callee can + ; use to manage the return value that the call eventually generates + +SIZEOF_FP_REGS equ 40h ; xmm0-3 + +SIZEOF_PINVOKE_FRAME equ 60h + +SIZEOF_OUT_REG_HOMES equ 20h ; Callee register spill + +; +; From CallerSP to ChildSP, the stack frame is composed of the following six adjacent +; regions: +; +; SIZEOF_RETADDR +; SIZEOF_PINVOKE_FRAME_PTR +; SIZEOF_RETURN_BLOCK +; SIZEOF_FP_REGS +; SIZEOF_PINVOKE_FRAME +; SIZEOF_OUT_REG_HOMES +; + +DISTANCE_FROM_FP_REGS_TO_CALLERSP equ SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_PINVOKE_FRAME_PTR + SIZEOF_RETADDR + +DISTANCE_FROM_CHILDSP_TO_FP_REGS equ SIZEOF_OUT_REG_HOMES + SIZEOF_PINVOKE_FRAME + +DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + SIZEOF_FP_REGS + +DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + DISTANCE_FROM_FP_REGS_TO_CALLERSP + +; RBP is required to point one slot above the PInvoke frame pointer and therefore points +; to the caller return address. +DISTANCE_FROM_CHILDSP_TO_RBP equ DISTANCE_FROM_CHILDSP_TO_CALLERSP - SIZEOF_RETADDR + +; Note that the PInvoke frame lies directly below the FP regs area. +DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP equ DISTANCE_FROM_FP_REGS_TO_CALLERSP +DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR equ DISTANCE_FROM_FP_REGS_TO_CALLERSP - SIZEOF_RETADDR + +; +; Note: The distance from the top of the PInvoke frame to the CallerSP must be a multiple +; of 16. If not, PUSH_COOP_PINVOKE_FRAME will inject 8 bytes of padding (in order to +; ensure a 16-byte aligned ChildSP) and will therefore break the expected stack layout. +; + +.errnz DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP mod 16 + ;; ;; Defines an assembly thunk used to make a transition from managed code to a callee, ;; then (based on the return value from the callee), either returning or jumping to @@ -21,88 +81,107 @@ ifdef FEATURE_DYNAMIC_CODE ;; r11: The only parameter to the target function (passed in rdx to callee) ;; -SIZEOF_OUT_REG_HOMES equ 20h ; Callee register spill -SIZEOF_FP_REGS equ 40h ; xmm0-3 -SIZEOF_PINVOKE_FRAME equ 80h ; for default arg push -SIZEOF_SCRATCH_SPACE equ 10h ; for 16 bytes of conservatively reported scratch space -OFFSETOF_FP_ARG_SPILL equ SIZEOF_PINVOKE_FRAME + 10h -OFFSETOF_SCRATCH_SPACE equ OFFSETOF_FP_ARG_SPILL + SIZEOF_FP_REGS - -ALLOC_SIZE equ SIZEOF_FP_REGS + SIZEOF_SCRATCH_SPACE + 10h -SIZEOF_STACK_FRAME equ SIZEOF_PINVOKE_FRAME + ALLOC_SIZE + 10h - - -; [callee return] -; [out rcx] -; [out rdx] -; [out r8] -; [out r9] -; [pinvoke frame, 60h] -; [XMM regs, 40h] -; [ConservativelyReportedScratchSpace 10h] (+0xc0) -; [ptr to pinvoke frame 8h] (+0xd0) -; [caller return addr] (+0xd8) -; [in rcx] -; [in rdx] -; [in r8] -; [in r9] - +; +; Stack frame layout (from lower addresses to higher addresses): +; +; [callee return] ChildSP-008 CallerSP-0e8 +; [out rcx] ChildSP+000 CallerSP-0e0 +; [out rdx] ChildSP+008 CallerSP-0d8 +; [out r8] ChildSP+010 CallerSP-0d0 +; [out r9] ChildSP+018 CallerSP-0c8 +; [pinvoke frame, 60h] ChildSP+020 CallerSP-0c0 +; [XMM regs (argument regs from the caller), 40h] ChildSP+080 CallerSP-060 +; [ConservativelyReportedReturnBlock 10h] ChildSP+0c0 CallerSP-020 +; [ptr to pinvoke frame 8h] ChildSP+0d0 CallerSP-010 +; [caller return addr] ChildSP+0d8 CallerSP-008 +; [in rcx (argument reg from the caller)] ChildSP+0e0 CallerSP+000 +; [in rdx (argument reg from the caller)] ChildSP+0e8 CallerSP+008 +; [in r8 (argument reg from the caller)] ChildSP+0f0 CallerSP+010 +; [in r9 (argument reg from the caller)] ChildSP+0f8 CallerSP+018 +; [stack-passed arguments from the caller] ChildSP+100 CallerSP+020 +; +; Note: The callee receives a pointer to the base of the conservatively reported return +; block, and the callee has knowledge of the exact layout of all pieces of the frame +; that lie at or above the pushed XMM registers. +; NESTED_ENTRY RhpUniversalTransition, _TEXT - mov [rsp+8h], r10 ; Temporarily save r10 as it's actually a parameter - alloc_stack ALLOC_SIZE + alloc_stack DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR - ; @TODO We are getting the thread here to avoid bifurcating PUSH_COOP_PINVOKE_FRAME, but we - ; really don't need the frame stored in the "hack pinvoke tunnel" because this codepath - ; doesn't use Enable/DisablePreemtiveGC - - INLINE_GETTHREAD rax, r10 ; rax <- Thread pointer, r10 <- trashed - PUSH_COOP_PINVOKE_FRAME rax, r10, ALLOC_SIZE - - mov rax, [rsp + SIZEOF_STACK_FRAME] ; restore r10 from input into rax - save_reg_postrsp rcx, 0h + SIZEOF_STACK_FRAME - save_reg_postrsp rdx, 8h + SIZEOF_STACK_FRAME - save_reg_postrsp r8, 10h + SIZEOF_STACK_FRAME - save_reg_postrsp r9, 18h + SIZEOF_STACK_FRAME - - save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL - save_xmm128_postrsp xmm1, OFFSETOF_FP_ARG_SPILL + 10h - save_xmm128_postrsp xmm2, OFFSETOF_FP_ARG_SPILL + 20h - save_xmm128_postrsp xmm3, OFFSETOF_FP_ARG_SPILL + 30h + ; Build the frame that the stack walker will use to unwind through this function. The + ; <NoModeSwitch> flag indicates that this function never uses Enable/DisablePreemptiveGC, + ; implying that frame address does not need to be recorded in the current thread object. + ; This macro trashes rax but does not trash any other registers. + + PUSH_COOP_PINVOKE_FRAME notUsed, rax, DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR, <NoModeSwitch> + + ; Note that rax now holds the address of the newly allocated frame. Also note that, in + ; addition to allocating the PInvoke frame, the macro also allocated the outgoing + ; arguments area. + + save_reg_postrsp rcx, 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp rdx, 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp r8, 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + save_reg_postrsp r9, 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP + + save_xmm128_postrsp xmm0, DISTANCE_FROM_CHILDSP_TO_FP_REGS + save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h + save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h + save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h END_PROLOGUE - + ; Set rbp to point after our PInvokeTransitionFrame pointer, then store the pointer to this frame ; See StackFrameIterator::HandleManagedCalloutThunk. - lea rbp, [rsp+SIZEOF_PINVOKE_FRAME + ALLOC_SIZE + 8h] - mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], r10 - + lea rbp, [rsp + DISTANCE_FROM_CHILDSP_TO_RBP] + mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], rax + +if TRASH_SAVED_ARGUMENT_REGISTERS ne 0 + + ; Before calling out, trash all of the argument registers except the ones (rcx, rdx) that + ; hold outgoing arguments. All of these registers have been saved to the transition + ; frame, and the code at the call target is required to use only the transition frame + ; copies when dispatching this call to the eventual callee. + + movsd xmm0, mmword ptr [RhpFpTrashValues + 0h] + movsd xmm1, mmword ptr [RhpFpTrashValues + 8h] + movsd xmm2, mmword ptr [RhpFpTrashValues + 10h] + movsd xmm3, mmword ptr [RhpFpTrashValues + 18h] + + mov r8, qword ptr [RhpIntegerTrashValues + 10h] + mov r9, qword ptr [RhpIntegerTrashValues + 18h] + +endif ; TRASH_SAVED_ARGUMENT_REGISTERS + ; ; Call out to the target, while storing and reporting arguments to the GC. ; mov rdx, r11 - lea rcx, [rsp + OFFSETOF_SCRATCH_SPACE] - call rax + lea rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK] + call r10 LABELED_RETURN_ADDRESS ReturnFromUniversalTransition ; restore fp argument registers - movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL ] - movdqa xmm1, [rsp + OFFSETOF_FP_ARG_SPILL + 10h] - movdqa xmm2, [rsp + OFFSETOF_FP_ARG_SPILL + 20h] - movdqa xmm3, [rsp + OFFSETOF_FP_ARG_SPILL + 30h] + movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS ] + movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h] + movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h] + movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h] ; restore integer argument registers - mov rcx, [rsp + 0h + SIZEOF_STACK_FRAME] - mov rdx, [rsp + 8h + SIZEOF_STACK_FRAME] - mov r8, [rsp + 10h + SIZEOF_STACK_FRAME] - mov r9, [rsp + 18h + SIZEOF_STACK_FRAME] + mov rcx, [rsp + 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov rdx, [rsp + 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov r8, [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] + mov r9, [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] ; epilog nop - - POP_COOP_PINVOKE_FRAME ALLOC_SIZE - add rsp, ALLOC_SIZE + + ; Pop the outgoing arguments area and the PInvoke frame. + POP_COOP_PINVOKE_FRAME DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR + + ; Pop the extra space that was allocated between the PInvoke frame and the caller return address. + add rsp, DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_RETADDR TAILJMP_RAX diff --git a/src/Native/Runtime/arm/AsmMacros.h b/src/Native/Runtime/arm/AsmMacros.h index 104fa61b1..3b9ab20c6 100644 --- a/src/Native/Runtime/arm/AsmMacros.h +++ b/src/Native/Runtime/arm/AsmMacros.h @@ -113,13 +113,19 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP ;; on the current thread, ready to be used if and when the helper needs to transition to pre-emptive mode. ;; ;; INVARIANTS -;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and -;; certainly appear before any attempt to alter the stack pointer. +;; - The macro defines the tail of the method prolog (and commonly defines the entire method prolog in the +;; case where it is the first code in the method). The macro therefore must be the last code in a method's +;; prolog. The $ExtraStack parameter must be set to the size (in bytes) of the stack allocations, if any, +;; which were done in the preceding parts of the method prolog. ;; - This macro uses r4 and r5 (after their initial values have been saved in the frame) and upon exit r4 ;; will contain the current Thread*. ;; +;; If $NoModeSwitch is set, the current thread is not loaded and r4 is zero on exit. As a result, +;; $NoModeSwitch can only be used by helpers which guarantee that they will never transition into pre-emptive +;; mode and will never examine the m_pThread field of the frame. +;; MACRO - COOP_PINVOKE_FRAME_PROLOG + COOP_PINVOKE_FRAME_PROLOG_TAIL $ExtraStack, $NoModeSwitch PROLOG_STACK_ALLOC 4 ; Save space for caller's SP PROLOG_PUSH {r4-r6,r8-r10} ; Save preserved registers @@ -128,19 +134,40 @@ DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP PROLOG_PUSH {r11,lr} ; Save caller's frame-chain pointer and PC ; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11). - add r4, sp, #(12 * 4) + add r4, sp, #((12 * 4) + $ExtraStack) str r4, [sp, #(11 * 4)] ; Record the bitmask of saved registers in the frame (slot #4). mov r4, #DEFAULT_FRAME_SAVE_FLAGS str r4, [sp, #(4 * 4)] + IF "$NoModeSwitch" == "" + ; Save the current Thread * in the frame (slot #3). INLINE_GETTHREAD r4, r5 str r4, [sp, #(3 * 4)] ; Store the frame in the thread str sp, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel] + + ELSE + + ; Clear the m_pThread field in the frame. This is not strictly required for correctness + ; but guards against confusion in case the uninitialized content is ever accidentally + ; interpreted as a thread pointer (e.g., during debugging). + mov r4, #0 + str r4, [sp, #(3 * 4)] + + ENDIF + + MEND + +;; Macro used in the common case where PInvoke frame construction constitutes the entire prolog of the +;; helper and not just the tail end. + MACRO + COOP_PINVOKE_FRAME_PROLOG + + COOP_PINVOKE_FRAME_PROLOG_TAIL 0 MEND ;; Pop the frame and restore register state preserved by COOP_PINVOKE_FRAME_PROLOG but don't return to the @@ -276,4 +303,5 @@ $Name #ifdef FEATURE_GC_STRESS EXTERN $REDHAWKGCINTERFACE__STRESSGC EXTERN $THREAD__HIJACKFORGCSTRESS -#endif ;; FEATURE_GC_STRESS
\ No newline at end of file +#endif ;; FEATURE_GC_STRESS + diff --git a/src/Native/Runtime/arm/UniversalTransition.asm b/src/Native/Runtime/arm/UniversalTransition.asm index a0606ac0c..0fe0926a4 100644 --- a/src/Native/Runtime/arm/UniversalTransition.asm +++ b/src/Native/Runtime/arm/UniversalTransition.asm @@ -5,6 +5,15 @@ #include "AsmMacros.h" +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + EXTERN RhpIntegerTrashValues + EXTERN RhpFpTrashValues +#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS + #define COUNT_ARG_REGISTERS (4) #define INTEGER_REGISTER_SIZE (4) #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) @@ -15,18 +24,31 @@ #define COUNT_FLOAT_ARG_REGISTERS (8) #define FLOAT_REGISTER_SIZE (8) #define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define TRANSITION_FRAMEPOINTER_AND_ALIGNMENT 8 + #define PINVOKE_TRANSITION_BLOCK_SIZE (12*INTEGER_REGISTER_SIZE) -#define PINVOKE_TRANSITION_FRAME_SP_OFFSET (0) -#define PINVOKE_TRANSITION_FRAME_FLAGS (4 * 7) +;; +;; From CallerSP to ChildSP, the stack frame is composed of the following five adjacent +;; regions: +;; +;; ARGUMENT_REGISTERS_SIZE +;; RETURN_BLOCK_SIZE +;; FLOAT_ARG_REGISTERS_SIZE +;; TRANSITION_FRAMEPOINTER_AND_ALIGNMENT +;; PINVOKE_TRANSITION_BLOCK_SIZE +;; +;; R7 points to the top of the TRANSITION_FRAMEPOINTER_AND_ALIGNMENT region. +;; + +#define DISTANCE_FROM_CHILDSP_TO_R7 (PINVOKE_TRANSITION_BLOCK_SIZE + TRANSITION_FRAMEPOINTER_AND_ALIGNMENT) -#define TRANSITION_FRAMEPOINTER_AND_ALIGNMENT 8 +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (DISTANCE_FROM_CHILDSP_TO_R7 + FLOAT_ARG_REGISTERS_SIZE) -#define TRANSITION_FRAME_STACK_OFFSET (TRANSITION_FRAMEPOINTER_AND_ALIGNMENT) -#define FLOATING_ARGS_STACK_OFFSET (TRANSITION_FRAME_STACK_OFFSET + PINVOKE_TRANSITION_BLOCK_SIZE) -#define RETURN_BLOCK_STACK_OFFSET (FLOATING_ARGS_STACK_OFFSET + FLOAT_ARG_REGISTERS_SIZE) -#define ARG_REGISTERS_OFFSET (RETURN_BLOCK_STACK_OFFSET + RETURN_BLOCK_SIZE) -#define INITIAL_STACK_POINTER_OFFSET (ARG_REGISTERS_OFFSET + ARGUMENT_REGISTERS_SIZE) +#define DISTANCE_FROM_CHILDSP_TO_CALLERSP (DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK + RETURN_BLOCK_SIZE + ARGUMENT_REGISTERS_SIZE) + +#define DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP (DISTANCE_FROM_CHILDSP_TO_CALLERSP - PINVOKE_TRANSITION_BLOCK_SIZE) TEXTAREA @@ -53,7 +75,8 @@ ;; R0 ;; RETURN BLOCK (32 byte chunk of conservatively handled memory) ;; ------ The base address of the Return block is the TransitionBlock pointer, the floating point args are -;; in the neg space of the TransitionBlock pointer. +;; in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +;; layout of all pieces of the frame that lie at or above the pushed floating point registers. ;; D7 ;; D6 ;; D5 @@ -62,11 +85,11 @@ ;; D2 ;; D1 ;; D0 +;; Pointer to Transition Frame +;; Alignment Padding (4 bytes) ;;--------------------- ;; PINVOKE TRANSITION_FRAME ;;--------------------- -;; Pointer to Transition Frame -;; Alignment Padding (4 bytes) ;; r0 shall contain a pointer to the TransitionBlock ;; r1 shall contain the value that was in sp-8 at entry to this function @@ -84,36 +107,53 @@ PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE ; Save space a buffer to be used to hold return buffer data. PROLOG_VPUSH {d0-d7} ; Capture the floating point argument registers - ;; Build PInvokeTransitionFrame. This is used to ensure all arguments are reported conservatively. - - PROLOG_STACK_ALLOC 4 ; Align the stack and save space for caller's SP - PROLOG_PUSH {r4-r6,r8-r10} ; Save preserved registers - PROLOG_STACK_ALLOC 8 ; Save space for flags and Thread* - PROLOG_PUSH {r7} ; Save caller's FP - PROLOG_PUSH {r11,lr} ; Save caller's frame-chain pointer and PC - - ;; Build space to save pointer to transition frame, and setup frame pointer - PROLOG_STACK_SAVE r7 + ;; Build space to save pointer to transition frame PROLOG_STACK_ALLOC TRANSITION_FRAMEPOINTER_AND_ALIGNMENT ; Space for transition frame pointer plus stack alignment padding - ;; Compute Transition frame address and store into frame - add r1, sp, #(TRANSITION_FRAME_STACK_OFFSET) - str r1, [r7, #MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET] + ;; Build the transition frame that the stack walker will use to unwind through this function. + ;; The NoModeSwitch flag indicates that this function never uses Enable/DisablePreemptiveGC, + ;; implying that frame address does not need to be recorded in the current thread object. + ;; This macro trashes r4 (after it is saved into the frame) but does not trash any other registers. - ;; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11). - add r1, sp, #(INITIAL_STACK_POINTER_OFFSET) - str r1, [sp, #(TRANSITION_FRAME_STACK_OFFSET + (11 * 4))] + COOP_PINVOKE_FRAME_PROLOG_TAIL DISTANCE_FROM_TOP_OF_PINVOKE_FRAME_TO_CALLERSP, NoModeSwitch - ;; Record the bitmask of saved registers in the frame (slot #4). - mov r1, #DEFAULT_FRAME_SAVE_FLAGS - str r1, [sp, #(TRANSITION_FRAME_STACK_OFFSET + (4 * 4))] + ;; The prolog has ended, r7 has been saved into the transition frame, and sp now holds + ;; the address of the newly allocated transition frame. If the stack walker unwinds + ;; through this function, it will locate the transition frame pointer by checking the stack + ;; slot directly below whatever address is in r7. Point r7 to the top of the 8 byte save + ;; area that was allocated above, then store the transition frame address directly below it. + add r7, sp, #DISTANCE_FROM_CHILDSP_TO_R7 + str sp, [r7, #MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET] ;; Setup the arguments to the transition thunk. mov r1, r3 +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + + ;; Before calling out, trash all of the argument registers except the ones (r0, r1) that + ;; hold outgoing arguments. All of these registers have been saved to the transition + ;; frame, and the code at the call target is required to use only the transition frame + ;; copies when dispatching this call to the eventual callee. + + ldr r3, =RhpFpTrashValues + vldr d0, [r3, #(0 * 8)] + vldr d1, [r3, #(1 * 8)] + vldr d2, [r3, #(2 * 8)] + vldr d3, [r3, #(3 * 8)] + vldr d4, [r3, #(4 * 8)] + vldr d5, [r3, #(5 * 8)] + vldr d6, [r3, #(6 * 8)] + vldr d7, [r3, #(7 * 8)] + + ldr r3, =RhpIntegerTrashValues + ldr r2, [r3, #(2 * 4)] + ldr r3, [r3, #(3 * 4)] + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + ;; Make the ReturnFromUniversalTransition alternate entry 4 byte aligned ALIGN 4 - add r0, sp, #(RETURN_BLOCK_STACK_OFFSET) ; First parameter to target function is a pointer to the return block + add r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK ;; First parameter to target function is a pointer to the return block blx r12 LABELED_RETURN_ADDRESS ReturnFromUniversalTransition @@ -121,16 +161,12 @@ ;; argument registers. Additionally make sure the thumb2 bit is set. orr r12, r0, #1 + ;; Pop the PInvokeTransitionFrame + COOP_PINVOKE_FRAME_EPILOG_NO_RETURN + ;; Pop the transition frame pointer and alignment EPILOG_STACK_FREE TRANSITION_FRAMEPOINTER_AND_ALIGNMENT ; Discard transition pointer region - ;; Pop the PInvokeTransitionFrame - EPILOG_POP {r11,lr} ; Restore caller's frame-chain pointer and PC (return address) - EPILOG_POP {r7} ; Restore caller's FP - EPILOG_STACK_FREE 8 ; Discard flags and Thread* - EPILOG_POP {r4-r6,r8-r10} ; Restore preserved registers - EPILOG_STACK_FREE 4 ; Discard caller's SP and stack alignment padding - ;; Restore the argument registers. EPILOG_VPOP {d0-d7} EPILOG_STACK_FREE RETURN_BLOCK_SIZE ; pop return block conservatively reported area diff --git a/src/Native/Runtime/i386/AsmMacros.inc b/src/Native/Runtime/i386/AsmMacros.inc index ffda6561e..8a249d2c1 100644 --- a/src/Native/Runtime/i386/AsmMacros.inc +++ b/src/Native/Runtime/i386/AsmMacros.inc @@ -85,21 +85,31 @@ endm ;; ;; EAX is trashed by this macro. ;; -PUSH_COOP_PINVOKE_FRAME macro threadReg +;; If <NoModeSwitch> is set, threadReg is ignored. As a result, <NoModeSwitch> can only be used by helpers +;; which guarantee that they will never transition into pre-emptive mode and will never examine the m_pThread +;; field of the frame. +;; +PUSH_COOP_PINVOKE_FRAME macro threadReg, NoModeSwitch lea eax, [ebp + 8] ; get the ESP of the caller push eax ; save ESP push edi push esi push ebx push PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP +ifb <NoModeSwitch> push threadReg ; Thread * (unused by stackwalker) +else + push 0 +endif mov eax, [ebp + 0] ; Find previous EBP value push eax ; save EBP mov eax, [ebp + 4] ; Find the return address push eax ; save m_RIP lea eax, [esp + 0] ; EAX == address of frame +ifb <NoModeSwitch> mov [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], eax ; Link frame to thread +endif endm ;; diff --git a/src/Native/Runtime/i386/UniversalTransition.asm b/src/Native/Runtime/i386/UniversalTransition.asm index fcfb247cc..7abd1fc63 100644 --- a/src/Native/Runtime/i386/UniversalTransition.asm +++ b/src/Native/Runtime/i386/UniversalTransition.asm @@ -33,15 +33,26 @@ ifdef FEATURE_DYNAMIC_CODE ;; FRAME POINTERS, OR THE STACK WALKER CAN'T STACKWALK OUT OF HERE ;; +; +; Stack frame layout (from lower addresses to higher addresses): +; ; [callee return] ; [pinvoke frame, 20h] -; [in edx] -; [in ecx] +; [in edx (argument register from the caller)] +; [in ecx (argument register from the caller)] ; [ConservativelyReportedScratchSpace 8h] +; -- On input (i.e., when control jumps to RhpUniversalTransition), the low 4 bytes of +; this area contain the address of the callee and the high 4 bytes of this area contain +; the extra argument to pass to the callee. ; [ptr to pinvoke frame 4h] ; [saved ebp register] ; [caller return addr] - +; [stack-passed arguments from the caller] +; +; Note: The callee receives a pointer to the pushed edx value, and the callee has +; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +; edx value. +; FASTCALL_FUNC RhpUniversalTransition_FAKE_ENTRY, 0 ; Set up an ebp frame @@ -54,24 +65,22 @@ ALTERNATE_ENTRY RhpUniversalTransition@0 push ecx push edx - ; @TODO We are getting the thread here to avoid bifurcating PUSH_COOP_PINVOKE_FRAME, but we - ; really don't need the frame stored in the "hack pinvoke tunnel" because this codepath - ; doesn't use Enable/DisablePreemtiveGC - - INLINE_GETTHREAD edx, eax ; edx <- Thread pointer, eax <- trashed - - PUSH_COOP_PINVOKE_FRAME edx + ; Build the frame that the stack walker will use to unwind through this function. The + ; <NoModeSwitch> flag indicates that this function never uses Enable/DisablePreemptiveGC, + ; implying that frame address does not need to be recorded in the current thread object. + PUSH_COOP_PINVOKE_FRAME notUsed, <NoModeSwitch> ;; Stash the pinvoke frame's address immediately on top of the old ebp value. This ;; position is important; the stack frame iterator knows about this setup. +.erre MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET eq -4 mov [ebp-4], esp ; ; Call out to the target, while storing and reporting arguments to the GC. ; - mov eax, [ebp-0Ch] - mov edx, [ebp-8] ; Get first argument - lea ecx, [ebp-14h] ; Get pointer to argument information + mov eax, [ebp-0Ch] ; Get the address of the callee + mov edx, [ebp-8] ; Get the extra argument to pass to the callee + lea ecx, [ebp-14h] ; Get pointer to edx value pushed above call eax LABELED_RETURN_ADDRESS ReturnFromUniversalTransition diff --git a/src/Native/Runtime/inc/rhbinder.h b/src/Native/Runtime/inc/rhbinder.h index 5e9c29baf..2fc1d6248 100644 --- a/src/Native/Runtime/inc/rhbinder.h +++ b/src/Native/Runtime/inc/rhbinder.h @@ -630,6 +630,7 @@ struct PInvokeTransitionFrame TgtPTR_Void m_RIP; TgtPTR_Void m_FramePointer; TgtPTR_Thread m_pThread; // unused by stack crawler, this is so GetThread is only called once per method + // can be an invalid pointer in universal transition cases (which never need to call GetThread) UInt32 m_dwFlags; // PInvokeTransitionFrameFlags #ifdef _TARGET_AMD64_ UInt32 m_dwAlignPad2; |