Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Kotas <jkotas@microsoft.com>2016-08-16 23:15:15 +0300
committerGitHub <noreply@github.com>2016-08-16 23:15:15 +0300
commit7b93a9fea3d9c495238f1755f5b50269fd9a0f54 (patch)
tree9436209a12962c89d345ec46e264a88a3c6bcff2 /src/Native/Runtime/amd64
parent1879db890e89d735682d2c06a98b773cfefe57f1 (diff)
Implement transition thunk for Unix amd64 (#1653)
Diffstat (limited to 'src/Native/Runtime/amd64')
-rw-r--r--src/Native/Runtime/amd64/AsmMacros.inc7
-rw-r--r--src/Native/Runtime/amd64/ManagedCalloutThunk.S56
-rw-r--r--src/Native/Runtime/amd64/ManagedCalloutThunk.asm57
-rw-r--r--src/Native/Runtime/amd64/StubDispatch.S107
-rw-r--r--src/Native/Runtime/amd64/StubDispatch.asm97
-rw-r--r--src/Native/Runtime/amd64/UniversalTransition.S164
-rw-r--r--src/Native/Runtime/amd64/UniversalTransition.asm10
7 files changed, 178 insertions, 320 deletions
diff --git a/src/Native/Runtime/amd64/AsmMacros.inc b/src/Native/Runtime/amd64/AsmMacros.inc
index 9177bd460..3de5e2451 100644
--- a/src/Native/Runtime/amd64/AsmMacros.inc
+++ b/src/Native/Runtime/amd64/AsmMacros.inc
@@ -355,13 +355,6 @@ PTFF_RAX_IS_BYREF equ 00020000h ;; iff PTFF_SAVE_RAX: set -> eax is ByRe
;;
-;; Offset from FP (rbp) where the managed callout thunk (ManagedCallout2 and possibly others in the future)
-;; store a pointer to a transition frame.
-;;
-MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET equ -8
-
-
-;;
;; CONSTANTS -- SYMBOLS
;;
diff --git a/src/Native/Runtime/amd64/ManagedCalloutThunk.S b/src/Native/Runtime/amd64/ManagedCalloutThunk.S
deleted file mode 100644
index e50e58b29..000000000
--- a/src/Native/Runtime/amd64/ManagedCalloutThunk.S
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-.intel_syntax noprefix
-#include <unixasmmacros.inc>
-
-//
-// Defines a small assembly thunk designed to be used when unmanaged code in the runtime calls out to managed
-// code. In such cases the stack walker needs to be able to bridge the unmanaged gap in the stack between the
-// callout and whatever managed code initially entered the runtime. This thunk makes that goal achievable by
-// (a) exporting a well-known address in the thunk that will be the result of unwinding from the callout (so
-// the stack frame iterator knows when its hit this case) and (b) placing a copy of a pointer to a transition
-// frame saved when the previous managed caller entered the runtime into a well-known location relative to the
-// thunk's frame, enabling the stack frame iterator to recover the transition frame address and use it to
-// re-initialize the stack walk at the previous managed caller.
-//
-// If we end up with more cases of this (currently it's used only for the ICastable extension point for
-// interface dispatch) then we might decide to produce a general routine which can handle an arbitrary number
-// of arguments to the target method. For now we'll just implement the case we need, which takes two regular
-// arguments (that's the 2 in the ManagedCallout2 name).
-//
-// Inputs:
-// rdi : Argument 1 to target method
-// rsi : Argument 2 to target method
-// rdx : Target method address
-// rcx : Pointer to previous managed method's transition frame into the runtime
-//
-NESTED_ENTRY ManagedCallout2, _TEXT, NoHandler
-
- // Push an rbp frame. Apart from making it easier to walk the stack the stack frame iterator locates
- // the transition frame for the previous managed caller relative to the frame pointer to keep the code
- // architecture independent.
- push_nonvol_reg rbp
- mov rbp, rsp
-
- // Allocate space for transition frame pointer and stack alignment padding.
- alloc_stack 8 + 8
-
- END_PROLOGUE
-
- // Stash the previous transition frame's address immediately on top of the old rbp value. This
- // position is important; the stack frame iterator knows about this setup.
- mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], rcx
-
- // Call the target method. Arguments are already in the correct registers. The
- // ReturnFromManagedCallout2 label must immediately follow the call instruction.
- call rdx
-ALTERNATE_ENTRY ReturnFromManagedCallout2
-
- // Pop the rbp frame and return.
- mov rsp, rbp
- pop rbp
- ret
-
-NESTED_END ManagedCallout2, _TEXT
diff --git a/src/Native/Runtime/amd64/ManagedCalloutThunk.asm b/src/Native/Runtime/amd64/ManagedCalloutThunk.asm
deleted file mode 100644
index 9d3a137b9..000000000
--- a/src/Native/Runtime/amd64/ManagedCalloutThunk.asm
+++ /dev/null
@@ -1,57 +0,0 @@
-;; Licensed to the .NET Foundation under one or more agreements.
-;; The .NET Foundation licenses this file to you under the MIT license.
-;; See the LICENSE file in the project root for more information.
-
-include AsmMacros.inc
-
-;;
-;; Defines a small assembly thunk designed to be used when unmanaged code in the runtime calls out to managed
-;; code. In such cases the stack walker needs to be able to bridge the unmanaged gap in the stack between the
-;; callout and whatever managed code initially entered the runtime. This thunk makes that goal achievable by
-;; (a) exporting a well-known address in the thunk that will be the result of unwinding from the callout (so
-;; the stack frame iterator knows when its hit this case) and (b) placing a copy of a pointer to a transition
-;; frame saved when the previous managed caller entered the runtime into a well-known location relative to the
-;; thunk's frame, enabling the stack frame iterator to recover the transition frame address and use it to
-;; re-initialize the stack walk at the previous managed caller.
-;;
-;; If we end up with more cases of this (currently it's used only for the ICastable extension point for
-;; interface dispatch) then we might decide to produce a general routine which can handle an arbitrary number
-;; of arguments to the target method. For now we'll just implement the case we need, which takes two regular
-;; arguments (that's the 2 in the ManagedCallout2 name).
-;;
-;; Inputs:
-;; rcx : Argument 1 to target method
-;; rdx : Argument 2 to target method
-;; r8 : Target method address
-;; r9 : Pointer to previous managed method's transition frame into the runtime
-;;
-NESTED_ENTRY ManagedCallout2, _TEXT
-
- ;; Push an rbp frame. Apart from making it easier to walk the stack the stack frame iterator locates
- ;; the transition frame for the previous managed caller relative to the frame pointer to keep the code
- ;; architecture independent.
- push_nonvol_reg rbp
- set_frame rbp, 0
-
- ;; Allocate scratch space + space for transition frame pointer and stack alignment padding.
- alloc_stack 20h + 8h + 8h
-
- END_PROLOGUE
-
- ;; Stash the previous transition frame's address immediately on top of the old rbp value. This
- ;; position is important; the stack frame iterator knows about this setup.
- mov [rbp + MANAGED_CALLOUT_THUNK_TRANSITION_FRAME_POINTER_OFFSET], r9
-
- ;; Call the target method. Arguments are already in the correct registers. The
- ;; ReturnFromManagedCallout2 label must immediately follow the call instruction.
- call r8
-LABELED_RETURN_ADDRESS ReturnFromManagedCallout2
-
- ;; Pop the rbp frame and return.
- mov rsp, rbp
- pop rbp
- ret
-
-NESTED_END ManagedCallout2, _TEXT
-
-END
diff --git a/src/Native/Runtime/amd64/StubDispatch.S b/src/Native/Runtime/amd64/StubDispatch.S
index 34145b2e3..baca8fd42 100644
--- a/src/Native/Runtime/amd64/StubDispatch.S
+++ b/src/Native/Runtime/amd64/StubDispatch.S
@@ -62,101 +62,12 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
LEAF_END RhpInitialInterfaceDispatch, _TEXT
// Cache miss case, call the runtime to resolve the target and update the cache.
-NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler
-
- #define RIDS_ReservedStack 0x108 // 0x50 + 0x80 + 0x30 + 8 => transition frame, xmm registers, argument registers and padding
-
- rsp_offsetof_xmmregs = 0x50
- rsp_offsetof_argregs = 0x50 + 0x80
-
- alloc_stack RIDS_ReservedStack
-
- // Preserve the argument registers in the scratch space across the helper call. Note that we depend on these
- // registers (which may contain GC references) being spilled before we build the PInvokeTransitionFrame below
- // due to the way we build a stack range to report to the GC conservatively during a collection.
- mov [rsp + rsp_offsetof_argregs + 8*0], rdi
- mov [rsp + rsp_offsetof_argregs + 8*1], rsi
- mov [rsp + rsp_offsetof_argregs + 8*2], rdx
- mov [rsp + rsp_offsetof_argregs + 8*3], rcx
- mov [rsp + rsp_offsetof_argregs + 8*4], r8
- mov [rsp + rsp_offsetof_argregs + 8*5], r9
-
- save_xmm128_postrsp xmm0, (rsp_offsetof_xmmregs + 16*0)
- save_xmm128_postrsp xmm1, (rsp_offsetof_xmmregs + 16*1)
- save_xmm128_postrsp xmm2, (rsp_offsetof_xmmregs + 16*2)
- save_xmm128_postrsp xmm3, (rsp_offsetof_xmmregs + 16*3)
- save_xmm128_postrsp xmm4, (rsp_offsetof_xmmregs + 16*4)
- save_xmm128_postrsp xmm5, (rsp_offsetof_xmmregs + 16*5)
- save_xmm128_postrsp xmm6, (rsp_offsetof_xmmregs + 16*6)
- save_xmm128_postrsp xmm7, (rsp_offsetof_xmmregs + 16*7)
- END_PROLOGUE
-
- // Build PInvokeTransitionFrame. This is only required if we end up resolving the interface method via
- // a callout to a managed ICastable method. In that instance we need to be able to cope with garbage
- // collections which in turn need to be able to walk the stack from the ICastable method, skip the
- // unmanaged runtime portions and resume walking at our caller. This frame provides both the means to
- // unwind to that caller and a place to spill callee saved registers in case they contain GC
- // references from the caller.
-
- // Save caller's rip.
- mov rax, [rsp + RIDS_ReservedStack]
- mov [rsp + 8*0], rax
-
- // Save caller's rbp.
- mov [rsp + 8*1], rbp
-
- // Zero out the Thread*, it's not used by the stackwalker.
- xor rax, rax
- mov [rsp + 8*2], rax
-
- // Set the flags.
- mov dword ptr [rsp + 8*3], PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
-
- // Save callee saved registers.
- mov [rsp + 8*4], rbx
- mov [rsp + 8*5], r12
- mov [rsp + 8*6], r13
- mov [rsp + 8*7], r14
- mov [rsp + 8*8], r15
-
- // Calculate and store the caller's rsp.
- lea rax, [rsp + RIDS_ReservedStack + 8]
- mov [rsp + 8*9], rax
-
- // First argument is the instance we're dispatching on which is already in rdi.
-
- // Second argument is the dispatch data cell. We still have this in r10
- mov rsi, r10
-
- // The third argument is the address of the transition frame we build above.
- lea rdx, [rsp + 30h]
-
- call C_FUNC(RhpResolveInterfaceMethodCacheMiss)
-
- // Recover callee-saved values from the transition frame in case a GC updated them.
- mov rbx, [rsp + 8*4]
- mov r12, [rsp + 8*5]
- mov r13, [rsp + 8*6]
- mov r14, [rsp + 8*7]
- mov r15, [rsp + 8*8]
-
- // Restore the argument registers.
- movdqa xmm0, [rsp + rsp_offsetof_xmmregs + 16*0]
- movdqa xmm1, [rsp + rsp_offsetof_xmmregs + 16*1]
- movdqa xmm2, [rsp + rsp_offsetof_xmmregs + 16*2]
- movdqa xmm3, [rsp + rsp_offsetof_xmmregs + 16*3]
- movdqa xmm4, [rsp + rsp_offsetof_xmmregs + 16*4]
- movdqa xmm5, [rsp + rsp_offsetof_xmmregs + 16*5]
- movdqa xmm6, [rsp + rsp_offsetof_xmmregs + 16*6]
- movdqa xmm7, [rsp + rsp_offsetof_xmmregs + 16*7]
- mov r9, [rsp + rsp_offsetof_argregs + 8*5]
- mov r8, [rsp + rsp_offsetof_argregs + 8*4]
- mov rcx, [rsp + rsp_offsetof_argregs + 8*3]
- mov rdx, [rsp + rsp_offsetof_argregs + 8*2]
- mov rsi, [rsp + rsp_offsetof_argregs + 8*1]
- mov rdi, [rsp + rsp_offsetof_argregs + 8*0]
-
- add rsp, RIDS_ReservedStack
-
- jmp rax
-NESTED_END RhpInterfaceDispatchSlow, _TEXT
+// Use universal transition helper to allow an exception to flow out of resolution
+LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+ // r10 contains indirection cell address, move to r11 where it will be passed by
+ // the universal transition thunk as an argument to RhpCidResolve
+ mov r11, r10
+ lea r10, [rip + C_FUNC(RhpCidResolve)]
+ jmp C_FUNC(RhpUniversalTransition_DebugStepTailCall)
+
+LEAF_END RhpInterfaceDispatchSlow, _TEXT
diff --git a/src/Native/Runtime/amd64/StubDispatch.asm b/src/Native/Runtime/amd64/StubDispatch.asm
index 83ab0a6f1..a132cb1fb 100644
--- a/src/Native/Runtime/amd64/StubDispatch.asm
+++ b/src/Native/Runtime/amd64/StubDispatch.asm
@@ -7,9 +7,6 @@ include AsmMacros.inc
ifdef FEATURE_CACHED_INTERFACE_DISPATCH
-ifdef LEGACY_INTERFACE_DISPATCH
-EXTERN RhpResolveInterfaceMethodCacheMiss : PROC
-endif ;; LEGACY_INTERFACE_DISPATCH
EXTERN RhpCidResolve : PROC
EXTERN RhpUniversalTransition_DebugStepTailCall : PROC
@@ -80,7 +77,6 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
LEAF_END RhpInitialInterfaceDispatch, _TEXT
-ifndef LEGACY_INTERFACE_DISPATCH
;; Cache miss case, call the runtime to resolve the target and update the cache.
;; Use universal transition helper to allow an exception to flow out of resolution
LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
@@ -89,101 +85,8 @@ LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
mov r11, r10
lea r10, RhpCidResolve
jmp RhpUniversalTransition_DebugStepTailCall
-EXTERN RhpCidResolve : PROC
LEAF_END RhpInterfaceDispatchSlow, _TEXT
-endif ;; !LEGACY_INTERFACE_DISPATCH
-
-ifdef LEGACY_INTERFACE_DISPATCH
-;; CORE_RT code, which uses legacy RhpResolveInterfaceCacheMissFunction
-;; Cache miss case, call the runtime to resolve the target and update the cache.
-NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT
-
-RIDS_ReservedStack equ 20h + 60h + 40h + 8h ;; Scratch space, transition frame, xmm registers and padding
-
- alloc_stack RIDS_ReservedStack
-
- ;; Preserve the argument registers in the scratch space across the helper call. Note that we depend on these
- ;; registers (which may contain GC references) being spilled before we build the PInvokeTransitionFrame below
- ;; due to the way we build a stack range to report to the GC conservatively during a collection.
- save_reg_postrsp rcx, (RIDS_ReservedStack + 8*1)
- save_reg_postrsp rdx, (RIDS_ReservedStack + 8*2)
- save_reg_postrsp r8, (RIDS_ReservedStack + 8*3)
- save_reg_postrsp r9, (RIDS_ReservedStack + 8*4)
- save_xmm128_postrsp xmm0, (20h + 60h + 16*0)
- save_xmm128_postrsp xmm1, (20h + 60h + 16*1)
- save_xmm128_postrsp xmm2, (20h + 60h + 16*2)
- save_xmm128_postrsp xmm3, (20h + 60h + 16*3)
- END_PROLOGUE
-
- ;; Build PInvokeTransitionFrame. This is only required if we end up resolving the interface method via
- ;; a callout to a managed ICastable method. In that instance we need to be able to cope with garbage
- ;; collections which in turn need to be able to walk the stack from the ICastable method, skip the
- ;; unmanaged runtime portions and resume walking at our caller. This frame provides both the means to
- ;; unwind to that caller and a place to spill callee saved registers in case they contain GC
- ;; references from the caller.
-
- ;; Save caller's rip.
- mov rax, [rsp + RIDS_ReservedStack]
- mov [rsp + 20h + 8*0], rax
-
- ;; Save caller's rbp.
- mov [rsp + 20h + 8*1], rbp
-
- ;; Zero out the Thread*, it's not used by the stackwalker.
- xor rax, rax
- mov [rsp + 20h + 8*2], rax
-
- ;; Set the flags.
- mov dword ptr [rsp + 20h + 8*3], PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
-
- ;; Save callee saved registers.
- mov [rsp + 20h + 8*4], rbx
- mov [rsp + 20h + 8*5], rsi
- mov [rsp + 20h + 8*6], rdi
- mov [rsp + 20h + 8*7], r12
- mov [rsp + 20h + 8*8], r13
- mov [rsp + 20h + 8*9], r14
- mov [rsp + 20h + 8*10], r15
-
- ;; Calculate and store the caller's rsp.
- lea rax, [rsp + RIDS_ReservedStack + 8]
- mov [rsp + 20h + 8*11], rax
-
- ;; First argument is the instance we're dispatching on which is already in rcx.
-
- ;; Second argument is the dispatch data cell. We still have this in r10
- mov rdx, r10
-
- ;; The third argument is the address of the transition frame we build above.
- lea r8, [rsp + 20h]
-
- call RhpResolveInterfaceMethodCacheMiss
-
- ;; Recover callee-saved values from the transition frame in case a GC updated them.
- mov rbx, [rsp + 20h + 8*4]
- mov rsi, [rsp + 20h + 8*5]
- mov rdi, [rsp + 20h + 8*6]
- mov r12, [rsp + 20h + 8*7]
- mov r13, [rsp + 20h + 8*8]
- mov r14, [rsp + 20h + 8*9]
- mov r15, [rsp + 20h + 8*10]
-
- ;; Restore the argument registers.
- movdqa xmm0, [rsp + 20h + 60h + 16*0]
- movdqa xmm1, [rsp + 20h + 60h + 16*1]
- movdqa xmm2, [rsp + 20h + 60h + 16*2]
- movdqa xmm3, [rsp + 20h + 60h + 16*3]
- mov r9, [rsp + RIDS_ReservedStack + 8*4]
- mov r8, [rsp + RIDS_ReservedStack + 8*3]
- mov rdx, [rsp + RIDS_ReservedStack + 8*2]
- mov rcx, [rsp + RIDS_ReservedStack + 8*1]
-
- add rsp, RIDS_ReservedStack
- TAILJMP_RAX
-NESTED_END RhpInterfaceDispatchSlow, _TEXT
-endif ;; LEGACY_INTERFACE_DISPATCH
-
endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/Native/Runtime/amd64/UniversalTransition.S b/src/Native/Runtime/amd64/UniversalTransition.S
new file mode 100644
index 000000000..eda5f0bdf
--- /dev/null
+++ b/src/Native/Runtime/amd64/UniversalTransition.S
@@ -0,0 +1,164 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+#ifdef FEATURE_DYNAMIC_CODE
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS 1
+#endif
+
+#define SIZEOF_RETADDR 8
+
+#define SIZEOF_RETURN_BLOCK 0x10 // for 16 bytes of conservatively reported space that the callee can
+ // use to manage the return value that the call eventually generates
+
+#define SIZEOF_FP_REGS 0x80 // xmm0-7
+
+#define SIZEOF_ARGUMENT_REGISTERS 0x30 // Callee register spill
+
+//
+// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+//
+// SIZEOF_RETADDR
+// SIZEOF_ARGUMENT_REGISTERS
+// SIZEOF_RETURN_BLOCK
+// SIZEOF_FP_REGS
+//
+
+#define DISTANCE_FROM_CHILDSP_TO_FP_REGS 0
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK SIZEOF_FP_REGS
+
+#define DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK)
+
+#define DISTANCE_FROM_CHILDSP_TO_RETADDR (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_ARGUMENT_REGISTERS + 8)
+
+//
+// Defines an assembly thunk used to make a transition from managed code to a callee,
+// then (based on the return value from the callee), either returning or jumping to
+// a new location while preserving the input arguments. The usage of this thunk also
+// ensures arguments passed are properly reported.
+//
+// TODO: This code currently only tailcalls, and does not return.
+//
+// Inputs:
+// rdi, esi, rcx, rdx, r8, r9, stack space: arguments as normal
+// r10: The location of the target code the UniversalTransition thunk will call
+// r11: The only parameter to the target function (passed in rdx to callee)
+//
+
+//
+// Frame layout is:
+//
+// {StackPassedArgs} ChildSP+0D0 CallerSP+000
+// {CallerRetaddr} ChildSP+0C8 CallerSP-008
+// {AlignmentPad (0x8 bytes)} ChildSP+0C0 CallerSP-010
+// {IntArgRegs (0x30 bytes)} ChildSP+090 CallerSP-040
+// {ReturnBlock (0x10 bytes)} ChildSP+080 CallerSP-050
+// {FpArgRegs (xmm0-xmm7) (0x80 bytes)} ChildSP+000 CallerSP-0D0
+// {CalleeRetaddr} ChildSP-008 CallerSP-0D8
+//
+// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+// must be updated as well.
+//
+// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+// FpArgRegs.
+//
+// NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+// everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+//
+
+.macro UNIVERSAL_TRANSITION FunctionName
+
+NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler
+
+ alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+ // save integer argument registers
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00], rdi
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08], rsi
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10], rcx
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18], rdx
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20], r8
+ mov [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28], r9
+
+ // save fp argument registers
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00], xmm0
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10], xmm1
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20], xmm2
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30], xmm3
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40], xmm4
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50], xmm5
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60], xmm6
+ movdqa [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70], xmm7
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+ // Before calling out, trash all of the argument registers except the ones (rdi, rsi) that
+ // hold outgoing arguments. All of these registers have been saved to the transition
+ // frame, and the code at the call target is required to use only the transition frame
+ // copies when dispatching this call to the eventual callee.
+
+ movsd xmm0, [C_VAR(RhpFpTrashValues) + 0x0]
+ movsd xmm1, [C_VAR(RhpFpTrashValues) + 0x8]
+ movsd xmm2, [C_VAR(RhpFpTrashValues) + 0x10]
+ movsd xmm3, [C_VAR(RhpFpTrashValues) + 0x18]
+ movsd xmm4, [C_VAR(RhpFpTrashValues) + 0x20]
+ movsd xmm5, [C_VAR(RhpFpTrashValues) + 0x28]
+ movsd xmm6, [C_VAR(RhpFpTrashValues) + 0x30]
+ movsd xmm7, [C_VAR(RhpFpTrashValues) + 0x38]
+
+ mov rcx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x10]
+ mov rdx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x18]
+ mov r8, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x20]
+ mov r9, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x28]
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+ //
+ // Call out to the target, while storing and reporting arguments to the GC.
+ //
+ mov rsi, r11
+ lea rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK]
+ call r10
+ALTERNATE_ENTRY ReturnFrom\FunctionName
+
+ // restore fp argument registers
+ movdqa xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00]
+ movdqa xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10]
+ movdqa xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20]
+ movdqa xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30]
+ movdqa xmm4, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40]
+ movdqa xmm5, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50]
+ movdqa xmm6, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60]
+ movdqa xmm7, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70]
+
+ // restore integer argument registers
+ mov rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00]
+ mov rsi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08]
+ mov rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10]
+ mov rdx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18]
+ mov r8, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20]
+ mov r9, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28]
+
+ // Pop the space that was allocated between the ChildSP and the caller return address.
+ free_stack DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+ jmp rax
+
+NESTED_END Rhp\FunctionName, _TEXT
+
+.endm // UNIVERSAL_TRANSITION
+
+ // To enable proper step-in behavior in the debugger, we need to have two instances
+ // of the thunk. For the first one, the debugger steps into the call in the function,
+ // for the other, it steps over it.
+ UNIVERSAL_TRANSITION UniversalTransition
+ UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+#endif // FEATURE_DYNAMIC_CODE
diff --git a/src/Native/Runtime/amd64/UniversalTransition.asm b/src/Native/Runtime/amd64/UniversalTransition.asm
index ecc413e77..09f12373a 100644
--- a/src/Native/Runtime/amd64/UniversalTransition.asm
+++ b/src/Native/Runtime/amd64/UniversalTransition.asm
@@ -87,10 +87,10 @@ DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_RET
UNIVERSAL_TRANSITION macro FunctionName
-NESTED_ENTRY Rhp&FunctionName, _TEXT
+NESTED_ENTRY Rhp&FunctionName, _TEXT
alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR
-
+
save_reg_postrsp rcx, 0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
save_reg_postrsp rdx, 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
save_reg_postrsp r8, 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
@@ -100,7 +100,7 @@ NESTED_ENTRY Rhp&FunctionName, _TEXT
save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h
save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h
save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h
-
+
END_PROLOGUE
if TRASH_SAVED_ARGUMENT_REGISTERS ne 0
@@ -139,7 +139,7 @@ LABELED_RETURN_ADDRESS ReturnFrom&FunctionName
mov rdx, [rsp + 8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
mov r8, [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
mov r9, [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
-
+
; epilog
nop
@@ -157,7 +157,7 @@ NESTED_END Rhp&FunctionName, _TEXT
; for the other, it steps over it.
UNIVERSAL_TRANSITION UniversalTransition
UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
-
+
endif
end