diff options
author | Jan Kotas <jkotas@microsoft.com> | 2016-08-16 23:15:15 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-16 23:15:15 +0300 |
commit | 7b93a9fea3d9c495238f1755f5b50269fd9a0f54 (patch) | |
tree | 9436209a12962c89d345ec46e264a88a3c6bcff2 /src/Native/Runtime/amd64 | |
parent | 1879db890e89d735682d2c06a98b773cfefe57f1 (diff) |
Implement transition thunk for Unix amd64 (#1653)
Diffstat (limited to 'src/Native/Runtime/amd64')
-rw-r--r-- | src/Native/Runtime/amd64/AsmMacros.inc | 7 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/ManagedCalloutThunk.S | 56 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/ManagedCalloutThunk.asm | 57 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/StubDispatch.S | 107 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/StubDispatch.asm | 97 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/UniversalTransition.S | 164 | ||||
-rw-r--r-- | src/Native/Runtime/amd64/UniversalTransition.asm | 10 |
7 files changed, 178 insertions, 320 deletions
diff --git a/src/Native/Runtime/amd64/AsmMacros.inc b/src/Native/Runtime/amd64/AsmMacros.inc index 9177bd460..3de5e2451 100644 --- a/src/Native/Runtime/amd64/AsmMacros.inc +++ b/src/Native/Runtime/amd64/AsmMacros.inc @@ -355,13 +355,6 @@ PTFF_RAX_IS_BYREF equ 00020000h ;; iff PTFF_SAVE_RAX: set -> eax is ByRe ;; -;; Offset from FP (rbp) where the managed callout thunk (ManagedCallout2 and possibly others in the future) -;; store a pointer to a transition frame. -;; -MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET equ -8 - - -;; ;; CONSTANTS -- SYMBOLS ;; diff --git a/src/Native/Runtime/amd64/ManagedCalloutThunk.S b/src/Native/Runtime/amd64/ManagedCalloutThunk.S deleted file mode 100644 index e50e58b29..000000000 --- a/src/Native/Runtime/amd64/ManagedCalloutThunk.S +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -.intel_syntax noprefix -#include <unixasmmacros.inc> - -// -// Defines a small assembly thunk designed to be used when unmanaged code in the runtime calls out to managed -// code. In such cases the stack walker needs to be able to bridge the unmanaged gap in the stack between the -// callout and whatever managed code initially entered the runtime. This thunk makes that goal achievable by -// (a) exporting a well-known address in the thunk that will be the result of unwinding from the callout (so -// the stack frame iterator knows when its hit this case) and (b) placing a copy of a pointer to a transition -// frame saved when the previous managed caller entered the runtime into a well-known location relative to the -// thunk's frame, enabling the stack frame iterator to recover the transition frame address and use it to -// re-initialize the stack walk at the previous managed caller. -// -// If we end up with more cases of this (currently it's used only for the ICastable extension point for -// interface dispatch) then we might decide to produce a general routine which can handle an arbitrary number -// of arguments to the target method. For now we'll just implement the case we need, which takes two regular -// arguments (that's the 2 in the ManagedCallout2 name). -// -// Inputs: -// rdi : Argument 1 to target method -// rsi : Argument 2 to target method -// rdx : Target method address -// rcx : Pointer to previous managed method's transition frame into the runtime -// -NESTED_ENTRY ManagedCallout2, _TEXT, NoHandler - - // Push an rbp frame. Apart from making it easier to walk the stack the stack frame iterator locates - // the transition frame for the previous managed caller relative to the frame pointer to keep the code - // architecture independent. - push_nonvol_reg rbp - mov rbp, rsp - - // Allocate space for transition frame pointer and stack alignment padding. - alloc_stack 8 + 8 - - END_PROLOGUE - - // Stash the previous transition frame's address immediately on top of the old rbp value. This - // position is important; the stack frame iterator knows about this setup. - mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], rcx - - // Call the target method. Arguments are already in the correct registers. The - // ReturnFromManagedCallout2 label must immediately follow the call instruction. - call rdx -ALTERNATE_ENTRY ReturnFromManagedCallout2 - - // Pop the rbp frame and return. - mov rsp, rbp - pop rbp - ret - -NESTED_END ManagedCallout2, _TEXT diff --git a/src/Native/Runtime/amd64/ManagedCalloutThunk.asm b/src/Native/Runtime/amd64/ManagedCalloutThunk.asm deleted file mode 100644 index 9d3a137b9..000000000 --- a/src/Native/Runtime/amd64/ManagedCalloutThunk.asm +++ /dev/null @@ -1,57 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. -;; See the LICENSE file in the project root for more information. - -include AsmMacros.inc - -;; -;; Defines a small assembly thunk designed to be used when unmanaged code in the runtime calls out to managed -;; code. In such cases the stack walker needs to be able to bridge the unmanaged gap in the stack between the -;; callout and whatever managed code initially entered the runtime. This thunk makes that goal achievable by -;; (a) exporting a well-known address in the thunk that will be the result of unwinding from the callout (so -;; the stack frame iterator knows when its hit this case) and (b) placing a copy of a pointer to a transition -;; frame saved when the previous managed caller entered the runtime into a well-known location relative to the -;; thunk's frame, enabling the stack frame iterator to recover the transition frame address and use it to -;; re-initialize the stack walk at the previous managed caller. -;; -;; If we end up with more cases of this (currently it's used only for the ICastable extension point for -;; interface dispatch) then we might decide to produce a general routine which can handle an arbitrary number -;; of arguments to the target method. For now we'll just implement the case we need, which takes two regular -;; arguments (that's the 2 in the ManagedCallout2 name). -;; -;; Inputs: -;; rcx : Argument 1 to target method -;; rdx : Argument 2 to target method -;; r8 : Target method address -;; r9 : Pointer to previous managed method's transition frame into the runtime -;; -NESTED_ENTRY ManagedCallout2, _TEXT - - ;; Push an rbp frame. Apart from making it easier to walk the stack the stack frame iterator locates - ;; the transition frame for the previous managed caller relative to the frame pointer to keep the code - ;; architecture independent. - push_nonvol_reg rbp - set_frame rbp, 0 - - ;; Allocate scratch space + space for transition frame pointer and stack alignment padding. - alloc_stack 20h + 8h + 8h - - END_PROLOGUE - - ;; Stash the previous transition frame's address immediately on top of the old rbp value. This - ;; position is important; the stack frame iterator knows about this setup. - mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], r9 - - ;; Call the target method. Arguments are already in the correct registers. The - ;; ReturnFromManagedCallout2 label must immediately follow the call instruction. - call r8 -LABELED_RETURN_ADDRESS ReturnFromManagedCallout2 - - ;; Pop the rbp frame and return. - mov rsp, rbp - pop rbp - ret - -NESTED_END ManagedCallout2, _TEXT - -END diff --git a/src/Native/Runtime/amd64/StubDispatch.S b/src/Native/Runtime/amd64/StubDispatch.S index 34145b2e3..baca8fd42 100644 --- a/src/Native/Runtime/amd64/StubDispatch.S +++ b/src/Native/Runtime/amd64/StubDispatch.S @@ -62,101 +62,12 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT // Cache miss case, call the runtime to resolve the target and update the cache. -NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler - - #define RIDS_ReservedStack 0x108 // 0x50 + 0x80 + 0x30 + 8 => transition frame, xmm registers, argument registers and padding - - rsp_offsetof_xmmregs = 0x50 - rsp_offsetof_argregs = 0x50 + 0x80 - - alloc_stack RIDS_ReservedStack - - // Preserve the argument registers in the scratch space across the helper call. Note that we depend on these - // registers (which may contain GC references) being spilled before we build the PInvokeTransitionFrame below - // due to the way we build a stack range to report to the GC conservatively during a collection. - mov [rsp + rsp_offsetof_argregs + 8*0], rdi - mov [rsp + rsp_offsetof_argregs + 8*1], rsi - mov [rsp + rsp_offsetof_argregs + 8*2], rdx - mov [rsp + rsp_offsetof_argregs + 8*3], rcx - mov [rsp + rsp_offsetof_argregs + 8*4], r8 - mov [rsp + rsp_offsetof_argregs + 8*5], r9 - - save_xmm128_postrsp xmm0, (rsp_offsetof_xmmregs + 16*0) - save_xmm128_postrsp xmm1, (rsp_offsetof_xmmregs + 16*1) - save_xmm128_postrsp xmm2, (rsp_offsetof_xmmregs + 16*2) - save_xmm128_postrsp xmm3, (rsp_offsetof_xmmregs + 16*3) - save_xmm128_postrsp xmm4, (rsp_offsetof_xmmregs + 16*4) - save_xmm128_postrsp xmm5, (rsp_offsetof_xmmregs + 16*5) - save_xmm128_postrsp xmm6, (rsp_offsetof_xmmregs + 16*6) - save_xmm128_postrsp xmm7, (rsp_offsetof_xmmregs + 16*7) - END_PROLOGUE - - // Build PInvokeTransitionFrame. This is only required if we end up resolving the interface method via - // a callout to a managed ICastable method. In that instance we need to be able to cope with garbage - // collections which in turn need to be able to walk the stack from the ICastable method, skip the - // unmanaged runtime portions and resume walking at our caller. This frame provides both the means to - // unwind to that caller and a place to spill callee saved registers in case they contain GC - // references from the caller. - - // Save caller's rip. - mov rax, [rsp + RIDS_ReservedStack] - mov [rsp + 8*0], rax - - // Save caller's rbp. - mov [rsp + 8*1], rbp - - // Zero out the Thread*, it's not used by the stackwalker. - xor rax, rax - mov [rsp + 8*2], rax - - // Set the flags. - mov dword ptr [rsp + 8*3], PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP - - // Save callee saved registers. - mov [rsp + 8*4], rbx - mov [rsp + 8*5], r12 - mov [rsp + 8*6], r13 - mov [rsp + 8*7], r14 - mov [rsp + 8*8], r15 - - // Calculate and store the caller's rsp. - lea rax, [rsp + RIDS_ReservedStack + 8] - mov [rsp + 8*9], rax - - // First argument is the instance we're dispatching on which is already in rdi. - - // Second argument is the dispatch data cell. We still have this in r10 - mov rsi, r10 - - // The third argument is the address of the transition frame we build above. - lea rdx, [rsp + 30h] - - call C_FUNC(RhpResolveInterfaceMethodCacheMiss) - - // Recover callee-saved values from the transition frame in case a GC updated them. - mov rbx, [rsp + 8*4] - mov r12, [rsp + 8*5] - mov r13, [rsp + 8*6] - mov r14, [rsp + 8*7] - mov r15, [rsp + 8*8] - - // Restore the argument registers. - movdqa xmm0, [rsp + rsp_offsetof_xmmregs + 16*0] - movdqa xmm1, [rsp + rsp_offsetof_xmmregs + 16*1] - movdqa xmm2, [rsp + rsp_offsetof_xmmregs + 16*2] - movdqa xmm3, [rsp + rsp_offsetof_xmmregs + 16*3] - movdqa xmm4, [rsp + rsp_offsetof_xmmregs + 16*4] - movdqa xmm5, [rsp + rsp_offsetof_xmmregs + 16*5] - movdqa xmm6, [rsp + rsp_offsetof_xmmregs + 16*6] - movdqa xmm7, [rsp + rsp_offsetof_xmmregs + 16*7] - mov r9, [rsp + rsp_offsetof_argregs + 8*5] - mov r8, [rsp + rsp_offsetof_argregs + 8*4] - mov rcx, [rsp + rsp_offsetof_argregs + 8*3] - mov rdx, [rsp + rsp_offsetof_argregs + 8*2] - mov rsi, [rsp + rsp_offsetof_argregs + 8*1] - mov rdi, [rsp + rsp_offsetof_argregs + 8*0] - - add rsp, RIDS_ReservedStack - - jmp rax -NESTED_END RhpInterfaceDispatchSlow, _TEXT +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r10 contains indirection cell address, move to r11 where it will be passed by + // the universal transition thunk as an argument to RhpCidResolve + mov r11, r10 + lea r10, [rip + C_FUNC(RhpCidResolve)] + jmp C_FUNC(RhpUniversalTransition_DebugStepTailCall) + +LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/Native/Runtime/amd64/StubDispatch.asm b/src/Native/Runtime/amd64/StubDispatch.asm index 83ab0a6f1..a132cb1fb 100644 --- a/src/Native/Runtime/amd64/StubDispatch.asm +++ b/src/Native/Runtime/amd64/StubDispatch.asm @@ -7,9 +7,6 @@ include AsmMacros.inc ifdef FEATURE_CACHED_INTERFACE_DISPATCH -ifdef LEGACY_INTERFACE_DISPATCH -EXTERN RhpResolveInterfaceMethodCacheMiss : PROC -endif ;; LEGACY_INTERFACE_DISPATCH EXTERN RhpCidResolve : PROC EXTERN RhpUniversalTransition_DebugStepTailCall : PROC @@ -80,7 +77,6 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT -ifndef LEGACY_INTERFACE_DISPATCH ;; Cache miss case, call the runtime to resolve the target and update the cache. ;; Use universal transition helper to allow an exception to flow out of resolution LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT @@ -89,101 +85,8 @@ LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT mov r11, r10 lea r10, RhpCidResolve jmp RhpUniversalTransition_DebugStepTailCall -EXTERN RhpCidResolve : PROC LEAF_END RhpInterfaceDispatchSlow, _TEXT -endif ;; !LEGACY_INTERFACE_DISPATCH - -ifdef LEGACY_INTERFACE_DISPATCH -;; CORE_RT code, which uses legacy RhpResolveInterfaceCacheMissFunction -;; Cache miss case, call the runtime to resolve the target and update the cache. -NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT - -RIDS_ReservedStack equ 20h + 60h + 40h + 8h ;; Scratch space, transition frame, xmm registers and padding - - alloc_stack RIDS_ReservedStack - - ;; Preserve the argument registers in the scratch space across the helper call. Note that we depend on these - ;; registers (which may contain GC references) being spilled before we build the PInvokeTransitionFrame below - ;; due to the way we build a stack range to report to the GC conservatively during a collection. - save_reg_postrsp rcx, (RIDS_ReservedStack + 8*1) - save_reg_postrsp rdx, (RIDS_ReservedStack + 8*2) - save_reg_postrsp r8, (RIDS_ReservedStack + 8*3) - save_reg_postrsp r9, (RIDS_ReservedStack + 8*4) - save_xmm128_postrsp xmm0, (20h + 60h + 16*0) - save_xmm128_postrsp xmm1, (20h + 60h + 16*1) - save_xmm128_postrsp xmm2, (20h + 60h + 16*2) - save_xmm128_postrsp xmm3, (20h + 60h + 16*3) - END_PROLOGUE - - ;; Build PInvokeTransitionFrame. This is only required if we end up resolving the interface method via - ;; a callout to a managed ICastable method. In that instance we need to be able to cope with garbage - ;; collections which in turn need to be able to walk the stack from the ICastable method, skip the - ;; unmanaged runtime portions and resume walking at our caller. This frame provides both the means to - ;; unwind to that caller and a place to spill callee saved registers in case they contain GC - ;; references from the caller. - - ;; Save caller's rip. - mov rax, [rsp + RIDS_ReservedStack] - mov [rsp + 20h + 8*0], rax - - ;; Save caller's rbp. - mov [rsp + 20h + 8*1], rbp - - ;; Zero out the Thread*, it's not used by the stackwalker. - xor rax, rax - mov [rsp + 20h + 8*2], rax - - ;; Set the flags. - mov dword ptr [rsp + 20h + 8*3], PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP - - ;; Save callee saved registers. - mov [rsp + 20h + 8*4], rbx - mov [rsp + 20h + 8*5], rsi - mov [rsp + 20h + 8*6], rdi - mov [rsp + 20h + 8*7], r12 - mov [rsp + 20h + 8*8], r13 - mov [rsp + 20h + 8*9], r14 - mov [rsp + 20h + 8*10], r15 - - ;; Calculate and store the caller's rsp. - lea rax, [rsp + RIDS_ReservedStack + 8] - mov [rsp + 20h + 8*11], rax - - ;; First argument is the instance we're dispatching on which is already in rcx. - - ;; Second argument is the dispatch data cell. We still have this in r10 - mov rdx, r10 - - ;; The third argument is the address of the transition frame we build above. - lea r8, [rsp + 20h] - - call RhpResolveInterfaceMethodCacheMiss - - ;; Recover callee-saved values from the transition frame in case a GC updated them. - mov rbx, [rsp + 20h + 8*4] - mov rsi, [rsp + 20h + 8*5] - mov rdi, [rsp + 20h + 8*6] - mov r12, [rsp + 20h + 8*7] - mov r13, [rsp + 20h + 8*8] - mov r14, [rsp + 20h + 8*9] - mov r15, [rsp + 20h + 8*10] - - ;; Restore the argument registers. - movdqa xmm0, [rsp + 20h + 60h + 16*0] - movdqa xmm1, [rsp + 20h + 60h + 16*1] - movdqa xmm2, [rsp + 20h + 60h + 16*2] - movdqa xmm3, [rsp + 20h + 60h + 16*3] - mov r9, [rsp + RIDS_ReservedStack + 8*4] - mov r8, [rsp + RIDS_ReservedStack + 8*3] - mov rdx, [rsp + RIDS_ReservedStack + 8*2] - mov rcx, [rsp + RIDS_ReservedStack + 8*1] - - add rsp, RIDS_ReservedStack - TAILJMP_RAX -NESTED_END RhpInterfaceDispatchSlow, _TEXT -endif ;; LEGACY_INTERFACE_DISPATCH - endif ;; FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/Native/Runtime/amd64/UniversalTransition.S b/src/Native/Runtime/amd64/UniversalTransition.S new file mode 100644 index 000000000..eda5f0bdf --- /dev/null +++ b/src/Native/Runtime/amd64/UniversalTransition.S @@ -0,0 +1,164 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +.intel_syntax noprefix +#include <unixasmmacros.inc> + +#ifdef FEATURE_DYNAMIC_CODE + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS 1 +#endif + +#define SIZEOF_RETADDR 8 + +#define SIZEOF_RETURN_BLOCK 0x10 // for 16 bytes of conservatively reported space that the callee can + // use to manage the return value that the call eventually generates + +#define SIZEOF_FP_REGS 0x80 // xmm0-7 + +#define SIZEOF_ARGUMENT_REGISTERS 0x30 // Callee register spill + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// SIZEOF_RETADDR +// SIZEOF_ARGUMENT_REGISTERS +// SIZEOF_RETURN_BLOCK +// SIZEOF_FP_REGS +// + +#define DISTANCE_FROM_CHILDSP_TO_FP_REGS 0 + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK SIZEOF_FP_REGS + +#define DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK) + +#define DISTANCE_FROM_CHILDSP_TO_RETADDR (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_ARGUMENT_REGISTERS + 8) + +// +// Defines an assembly thunk used to make a transition from managed code to a callee, +// then (based on the return value from the callee), either returning or jumping to +// a new location while preserving the input arguments. The usage of this thunk also +// ensures arguments passed are properly reported. +// +// TODO: This code currently only tailcalls, and does not return. +// +// Inputs: +// rdi, esi, rcx, rdx, r8, r9, stack space: arguments as normal +// r10: The location of the target code the UniversalTransition thunk will call +// r11: The only parameter to the target function (passed in rdx to callee) +// + +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0D0 CallerSP+000 +// {CallerRetaddr} ChildSP+0C8 CallerSP-008 +// {AlignmentPad (0x8 bytes)} ChildSP+0C0 CallerSP-010 +// {IntArgRegs (0x30 bytes)} ChildSP+090 CallerSP-040 +// {ReturnBlock (0x10 bytes)} ChildSP+080 CallerSP-050 +// {FpArgRegs (xmm0-xmm7) (0x80 bytes)} ChildSP+000 CallerSP-0D0 +// {CalleeRetaddr} ChildSP-008 CallerSP-0D8 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + +.macro UNIVERSAL_TRANSITION FunctionName + +NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR + + // save integer argument registers + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00], rdi + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08], rsi + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10], rcx + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18], rdx + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20], r8 + mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28], r9 + + // save fp argument registers + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00], xmm0 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10], xmm1 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20], xmm2 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30], xmm3 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40], xmm4 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50], xmm5 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60], xmm6 + movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70], xmm7 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + + // Before calling out, trash all of the argument registers except the ones (rdi, rsi) that + // hold outgoing arguments. All of these registers have been saved to the transition + // frame, and the code at the call target is required to use only the transition frame + // copies when dispatching this call to the eventual callee. + + movsd xmm0, [C_VAR(RhpFpTrashValues) + 0x0] + movsd xmm1, [C_VAR(RhpFpTrashValues) + 0x8] + movsd xmm2, [C_VAR(RhpFpTrashValues) + 0x10] + movsd xmm3, [C_VAR(RhpFpTrashValues) + 0x18] + movsd xmm4, [C_VAR(RhpFpTrashValues) + 0x20] + movsd xmm5, [C_VAR(RhpFpTrashValues) + 0x28] + movsd xmm6, [C_VAR(RhpFpTrashValues) + 0x30] + movsd xmm7, [C_VAR(RhpFpTrashValues) + 0x38] + + mov rcx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x10] + mov rdx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x18] + mov r8, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x20] + mov r9, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x28] + +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + // + // Call out to the target, while storing and reporting arguments to the GC. + // + mov rsi, r11 + lea rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK] + call r10 +ALTERNATE_ENTRY ReturnFrom\FunctionName + + // restore fp argument registers + movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00] + movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10] + movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20] + movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30] + movdqa xmm4, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40] + movdqa xmm5, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50] + movdqa xmm6, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60] + movdqa xmm7, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70] + + // restore integer argument registers + mov rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00] + mov rsi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08] + mov rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10] + mov rdx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18] + mov r8, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20] + mov r9, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28] + + // Pop the space that was allocated between the ChildSP and the caller return address. + free_stack DISTANCE_FROM_CHILDSP_TO_RETADDR + + jmp rax + +NESTED_END Rhp\FunctionName, _TEXT + +.endm // UNIVERSAL_TRANSITION + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + +#endif // FEATURE_DYNAMIC_CODE diff --git a/src/Native/Runtime/amd64/UniversalTransition.asm b/src/Native/Runtime/amd64/UniversalTransition.asm index ecc413e77..09f12373a 100644 --- a/src/Native/Runtime/amd64/UniversalTransition.asm +++ b/src/Native/Runtime/amd64/UniversalTransition.asm @@ -87,10 +87,10 @@ DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_RET UNIVERSAL_TRANSITION macro FunctionName -NESTED_ENTRY Rhp&FunctionName, _TEXT +NESTED_ENTRY Rhp&FunctionName, _TEXT alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR - + save_reg_postrsp rcx, 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP save_reg_postrsp rdx, 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP save_reg_postrsp r8, 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP @@ -100,7 +100,7 @@ NESTED_ENTRY Rhp&FunctionName, _TEXT save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h - + END_PROLOGUE if TRASH_SAVED_ARGUMENT_REGISTERS ne 0 @@ -139,7 +139,7 @@ LABELED_RETURN_ADDRESS ReturnFrom&FunctionName mov rdx, [rsp + 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] mov r8, [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] mov r9, [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP] - + ; epilog nop @@ -157,7 +157,7 @@ NESTED_END Rhp&FunctionName, _TEXT ; for the other, it steps over it. UNIVERSAL_TRANSITION UniversalTransition UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall - + endif end |