Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordotnet-bot <dotnet-bot@microsoft.com>2017-10-21 00:49:47 +0300
committerdotnet-bot <dotnet-bot@microsoft.com>2017-10-21 00:49:47 +0300
commitb674c8cfe1802c553cb5463393ac9f02f58b5e67 (patch)
tree5cbee5d8f43eef2ace8f7baee26b025ce3bf9efa /src/Native
parentdd9a41db606ee41ee6ccdd921eac927173cec6f4 (diff)
[tfs-changeset: 1678781]
Diffstat (limited to 'src/Native')
-rw-r--r--src/Native/Runtime/CachedInterfaceDispatch.cpp30
-rw-r--r--src/Native/Runtime/EHHelpers.cpp17
-rw-r--r--src/Native/Runtime/PalRedhawk.h6
-rw-r--r--src/Native/Runtime/PalRedhawkCommon.h23
-rw-r--r--src/Native/Runtime/RHCodeMan.cpp1
-rw-r--r--src/Native/Runtime/StackFrameIterator.cpp118
-rw-r--r--src/Native/Runtime/arm/ThunkPoolThunks.asm8
-rw-r--r--src/Native/Runtime/arm64/AllocFast.asm90
-rw-r--r--src/Native/Runtime/arm64/AsmMacros.h74
-rw-r--r--src/Native/Runtime/arm64/AsmOffsetsCpu.h37
-rw-r--r--src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm26
-rw-r--r--src/Native/Runtime/arm64/GetThread.asm9
-rw-r--r--src/Native/Runtime/arm64/InteropThunksHelpers.asm53
-rw-r--r--src/Native/Runtime/arm64/MiscStubs.asm25
-rw-r--r--src/Native/Runtime/arm64/StubDispatch.asm107
-rw-r--r--src/Native/Runtime/arm64/ThunkPoolThunks.asm262
-rw-r--r--src/Native/Runtime/arm64/UniversalTransition.asm133
-rw-r--r--src/Native/Runtime/arm64/WriteBarriers.asm189
-rw-r--r--src/Native/Runtime/inc/gcinfo.h96
-rw-r--r--src/Native/Runtime/inc/rhbinder.h70
-rw-r--r--src/Native/Runtime/threadstore.cpp9
-rw-r--r--src/Native/Runtime/windows/PalRedhawkMinWin.cpp18
22 files changed, 1089 insertions, 312 deletions
diff --git a/src/Native/Runtime/CachedInterfaceDispatch.cpp b/src/Native/Runtime/CachedInterfaceDispatch.cpp
index 3c12cc7d9..397ddc7d1 100644
--- a/src/Native/Runtime/CachedInterfaceDispatch.cpp
+++ b/src/Native/Runtime/CachedInterfaceDispatch.cpp
@@ -188,12 +188,12 @@ static InterfaceDispatchCache * UpdateCellStubAndCache(InterfaceDispatchCell * p
// any more) we can place them on one of several free lists based on their size.
//
-#ifdef _AMD64_
+#if defined(_AMD64_) || defined(_ARM64_)
// Head of the list of discarded cache blocks that can't be re-used just yet.
-InterfaceDispatchCache * g_pDiscardedCacheList; // for AMD64, m_pCell is not used and we can link the discarded blocks themselves
+InterfaceDispatchCache * g_pDiscardedCacheList; // for AMD64 and ARM64, m_pCell is not used and we can link the discarded blocks themselves
-#else // ifdef _AMD64_
+#else // defined(_AMD64_) || defined(_ARM64_)
struct DiscardedCacheBlock
{
@@ -207,7 +207,7 @@ static DiscardedCacheBlock * g_pDiscardedCacheList = NULL;
// Free list of DiscardedCacheBlock items
static DiscardedCacheBlock * g_pDiscardedCacheFree = NULL;
-#endif // ifdef _AMD64_
+#endif // defined(_AMD64_) || defined(_ARM64_)
// Free lists for each cache size up to the maximum. We allocate from these in preference to new memory.
static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1];
@@ -352,13 +352,13 @@ static void DiscardCache(InterfaceDispatchCache * pCache)
CrstHolder lh(&g_sListLock);
-#ifdef _AMD64_
+#if defined(_AMD64_) || defined(_ARM64_)
- // on AMD64, we can thread the list through the blocks directly
+ // on AMD64 and ARM64, we can thread the list through the blocks directly
pCache->m_pNextFree = g_pDiscardedCacheList;
g_pDiscardedCacheList = pCache;
-#else // _AMD64_
+#else // defined(_AMD64_) || defined(_ARM64_)
// on other architectures, we cannot overwrite pCache->m_pNextFree yet
// because it shares storage with m_pCell which may still be used as a back
@@ -378,7 +378,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache)
g_pDiscardedCacheList = pDiscardedCacheBlock;
}
-#endif // _AMD64_
+#endif // defined(_AMD64_) || defined(_ARM64_)
}
// Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed)
@@ -388,7 +388,7 @@ void ReclaimUnusedInterfaceDispatchCaches()
// No need for any locks, we're not racing with any other threads any more.
// Walk the list of discarded caches.
-#ifdef _AMD64_
+#if defined(_AMD64_) || defined(_ARM64_)
// on AMD64, this is threaded directly through the cache blocks
InterfaceDispatchCache * pCache = g_pDiscardedCacheList;
@@ -406,7 +406,7 @@ void ReclaimUnusedInterfaceDispatchCaches()
pCache = pNextCache;
}
-#else // _AMD64_
+#else // defined(_AMD64_) || defined(_ARM64_)
// on other architectures, we use an auxiliary list instead
DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheList;
@@ -428,7 +428,7 @@ void ReclaimUnusedInterfaceDispatchCaches()
pDiscardedCacheBlock = pNextDiscardedCacheBlock;
}
-#endif // _AMD64_
+#endif // defined(_AMD64_) || defined(_ARM64_)
// We processed all the discarded entries, so we can simply NULL the list head.
g_pDiscardedCacheList = NULL;
@@ -498,11 +498,11 @@ COOP_PINVOKE_HELPER(PTR_Code, RhpUpdateDispatchCellCache, (InterfaceDispatchCell
if (InterfaceDispatchCell::IsCache(newCacheValue))
{
pCache = (InterfaceDispatchCache*)newCacheValue;
-#ifndef _AMD64_
- // Set back pointer to interface dispatch cell for non-AMD64
- // for AMD64, we have enough registers to make this trick unnecessary
+#if !defined(_AMD64_) && !defined(_ARM64_)
+ // Set back pointer to interface dispatch cell for non-AMD64 and non-ARM64
+ // for AMD64 and ARM64, we have enough registers to make this trick unnecessary
pCache->m_pCell = pCell;
-#endif // _AMD64_
+#endif // !defined(_AMD64_) && !defined(_ARM64_)
// Add entry to the first unused slot.
InterfaceDispatchCacheEntry * pCacheEntry = &pCache->m_rgEntries[cOldCacheEntries];
diff --git a/src/Native/Runtime/EHHelpers.cpp b/src/Native/Runtime/EHHelpers.cpp
index 85d2b6649..2b4817386 100644
--- a/src/Native/Runtime/EHHelpers.cpp
+++ b/src/Native/Runtime/EHHelpers.cpp
@@ -190,9 +190,19 @@ COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo,
pContext->Lr = pPalContext->LR;
pContext->Pc = pPalContext->IP;
#elif defined(_ARM64_)
- for (int i = 0; i < GEN_REG_COUNT; ++i) {
- pContext->X[i] = pPalContext->X[i];
- }
+ pContext->X0 = pPalContext->X0;
+ pContext->X1 = pPalContext->X1;
+ // TODO: Copy registers X2-X7 when we start supporting HVA's
+ pContext->X19 = pPalContext->X19;
+ pContext->X20 = pPalContext->X20;
+ pContext->X21 = pPalContext->X21;
+ pContext->X22 = pPalContext->X22;
+ pContext->X23 = pPalContext->X23;
+ pContext->X24 = pPalContext->X24;
+ pContext->X25 = pPalContext->X25;
+ pContext->X26 = pPalContext->X26;
+ pContext->X27 = pPalContext->X27;
+ pContext->X28 = pPalContext->X28;
pContext->Fp = pPalContext->FP;
pContext->Sp = pPalContext->SP;
pContext->Lr = pPalContext->LR;
@@ -204,6 +214,7 @@ COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo,
#if defined(_AMD64_) || defined(_ARM_) || defined(_X86_)
+// ARM64TODO
struct DISPATCHER_CONTEXT
{
UIntNative ControlPc;
diff --git a/src/Native/Runtime/PalRedhawk.h b/src/Native/Runtime/PalRedhawk.h
index 0dc30f19d..1d1b58166 100644
--- a/src/Native/Runtime/PalRedhawk.h
+++ b/src/Native/Runtime/PalRedhawk.h
@@ -383,10 +383,6 @@ typedef struct _NEON128 {
Int64 High;
} NEON128, *PNEON128;
-#if !defined(GEN_REG_COUNT)
-#define GEN_REG_COUNT 29
-#endif
-
typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
//
// Control flags.
@@ -431,7 +427,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
#pragma warning(push)
#pragma warning(disable:4201) // nameless struct
};
- UInt64 X[GEN_REG_COUNT];
+ UInt64 X[29];
};
#pragma warning(pop)
UInt64 Fp; // X29
diff --git a/src/Native/Runtime/PalRedhawkCommon.h b/src/Native/Runtime/PalRedhawkCommon.h
index 5166ad4b4..695732b37 100644
--- a/src/Native/Runtime/PalRedhawkCommon.h
+++ b/src/Native/Runtime/PalRedhawkCommon.h
@@ -66,20 +66,25 @@ struct PAL_LIMITED_CONTEXT
void SetIp(UIntNative ip) { IP = ip; }
void SetSp(UIntNative sp) { SP = sp; }
#elif defined(_TARGET_ARM64_)
-#if !defined(GEN_REG_COUNT)
-#define GEN_REG_COUNT 29
-#endif
-
- // ARM64TODO: we don't need to save X9-X15
- // ARM64TODO: do we need X16 (IP0), X17 (IP1), X18 or can we skip them?
- UIntNative X[GEN_REG_COUNT];
+ UIntNative X0;
+ UIntNative X1;
+ UIntNative X19;
+ UIntNative X20;
+ UIntNative X21;
+ UIntNative X22;
+ UIntNative X23;
+ UIntNative X24;
+ UIntNative X25;
+ UIntNative X26;
+ UIntNative X27;
+ UIntNative X28;
UIntNative FP;
UIntNative LR;
UIntNative SP;
UIntNative IP;
- UIntNative D[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved
+ UInt64 D[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved
// (V0-V7 and V16-V31 are not preserved according to the ABI spec).
@@ -87,6 +92,8 @@ struct PAL_LIMITED_CONTEXT
UIntNative GetSp() const { return SP; }
UIntNative GetFp() const { return FP; }
UIntNative GetLr() const { return LR; }
+ void SetIp(UIntNative ip) { IP = ip; }
+ void SetSp(UIntNative sp) { SP = sp; }
#elif defined(UNIX_AMD64_ABI)
// Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15
UIntNative IP;
diff --git a/src/Native/Runtime/RHCodeMan.cpp b/src/Native/Runtime/RHCodeMan.cpp
index 2d9d83359..2526edad9 100644
--- a/src/Native/Runtime/RHCodeMan.cpp
+++ b/src/Native/Runtime/RHCodeMan.cpp
@@ -1560,6 +1560,7 @@ void ** EECodeManager::GetReturnAddressLocationFromEpilog(GCInfoHeader * pInfoHe
return NULL;
#elif defined(_ARM64_)
+ UNREFERENCED_PARAMETER(pInfoHeader);
UNREFERENCED_PARAMETER(pbEpilog);
PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
diff --git a/src/Native/Runtime/StackFrameIterator.cpp b/src/Native/Runtime/StackFrameIterator.cpp
index 3d9a2eb4a..4a3a602ad 100644
--- a/src/Native/Runtime/StackFrameIterator.cpp
+++ b/src/Native/Runtime/StackFrameIterator.cpp
@@ -209,8 +209,45 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransit
}
#elif defined(_TARGET_ARM64_)
- UNREFERENCED_PARAMETER(pPreservedRegsCursor);
- PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
+ m_RegDisplay.pFP = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer);
+ m_RegDisplay.pLR = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP);
+
+ ASSERT(!(pFrame->m_dwFlags & PTFF_SAVE_FP)); // FP should never contain a GC ref because we require
+ // a frame pointer for methods with pinvokes
+
+ if (pFrame->m_dwFlags & PTFF_SAVE_X19) { m_RegDisplay.pX19 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X20) { m_RegDisplay.pX20 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X21) { m_RegDisplay.pX21 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X22) { m_RegDisplay.pX22 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X23) { m_RegDisplay.pX23 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X24) { m_RegDisplay.pX24 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X25) { m_RegDisplay.pX25 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X26) { m_RegDisplay.pX26 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X27) { m_RegDisplay.pX27 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X28) { m_RegDisplay.pX28 = pPreservedRegsCursor++; }
+
+ if (pFrame->m_dwFlags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; }
+
+ if (pFrame->m_dwFlags & PTFF_SAVE_X0) { m_RegDisplay.pX0 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X1) { m_RegDisplay.pX1 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X2) { m_RegDisplay.pX2 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X3) { m_RegDisplay.pX3 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X4) { m_RegDisplay.pX4 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X5) { m_RegDisplay.pX5 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X6) { m_RegDisplay.pX6 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_X7) { m_RegDisplay.pX7 = pPreservedRegsCursor++; }
+ if (pFrame->m_dwFlags & PTFF_SAVE_LR) { m_RegDisplay.pLR = pPreservedRegsCursor++; }
+
+ if (pFrame->m_dwFlags & PTFF_X0_IS_GCREF)
+ {
+ m_pHijackedReturnValue = (PTR_RtuObjectRef)m_RegDisplay.pX0;
+ m_HijackedReturnValueKind = GCRK_Object;
+ }
+ if (pFrame->m_dwFlags & PTFF_X0_IS_BYREF)
+ {
+ m_pHijackedReturnValue = (PTR_RtuObjectRef)m_RegDisplay.pX0;
+ m_HijackedReturnValueKind = GCRK_Byref;
+ }
#else // _TARGET_ARM_
if (pFrame->m_dwFlags & PTFF_SAVE_RBX) { m_RegDisplay.pRbx = pPreservedRegsCursor++; }
@@ -368,7 +405,35 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
m_RegDisplay.pR0 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R0);
#elif defined(_TARGET_ARM64_)
- PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
+ //
+ // preserved regs
+ //
+ m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X19);
+ m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X20);
+ m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X21);
+ m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X22);
+ m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X23);
+ m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X24);
+ m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X25);
+ m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X26);
+ m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X27);
+ m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X28);
+ m_RegDisplay.pFP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, FP);
+ m_RegDisplay.pLR = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, LR);
+
+ //
+ // preserved vfp regs
+ //
+ for (Int32 i = 0; i < 16 - 8; i++)
+ {
+ m_RegDisplay.D[i] = pCtx->D[i];
+ }
+ //
+ // scratch regs
+ //
+ m_RegDisplay.pX0 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X0);
+ m_RegDisplay.pX1 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X1);
+ // TODO: Copy X2-X7 when we start supporting HVA's
#elif defined(UNIX_AMD64_ABI)
//
@@ -529,7 +594,17 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou
m_RegDisplay.pR11 = thisFuncletPtrs.pR11;
#elif defined(_TARGET_ARM64_)
- PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
+ m_RegDisplay.pX19 = thisFuncletPtrs.pX19;
+ m_RegDisplay.pX20 = thisFuncletPtrs.pX20;
+ m_RegDisplay.pX21 = thisFuncletPtrs.pX21;
+ m_RegDisplay.pX22 = thisFuncletPtrs.pX22;
+ m_RegDisplay.pX23 = thisFuncletPtrs.pX23;
+ m_RegDisplay.pX24 = thisFuncletPtrs.pX24;
+ m_RegDisplay.pX25 = thisFuncletPtrs.pX25;
+ m_RegDisplay.pX26 = thisFuncletPtrs.pX26;
+ m_RegDisplay.pX27 = thisFuncletPtrs.pX27;
+ m_RegDisplay.pX28 = thisFuncletPtrs.pX28;
+ // ARM64TODO: FP? LR?
#elif defined(UNIX_AMD64_ABI)
// Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
@@ -850,18 +925,25 @@ public:
}
#elif defined(_TARGET_ARM64_)
+
+// Conservative GC reporting must be applied to everything between the base of the
+// ReturnBlock and the top of the StackPassedArgs.
private:
- // ARM64TODO: #error NYI for this arch
- UIntNative m_stackPassedArgs[1]; // Placeholder
+ UIntNative m_pushedFP; // ChildSP+000 CallerSP-0C8 (0x08 bytes) (fp)
+ UIntNative m_pushedLR; // ChildSP+008 CallerSP-0C0 (0x08 bytes) (lr)
+ UInt64 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0C0 (0x40 bytes) (d0-d7)
+ UIntNative m_returnBlock[8]; // ChildSP+050 CallerSP-080 (0x40 bytes)
+ UIntNative m_intArgRegs[8]; // ChildSP+090 CallerSP-040 (0x40 bytes) (x0-x7)
+ UIntNative m_stackPassedArgs[1]; // ChildSP+0D0 CallerSP+000 (unknown size)
+
public:
- PTR_UIntNative get_CallerSP() { PORTABILITY_ASSERT("@TODO: FIXME:ARM64"); return NULL; }
- PTR_UIntNative get_AddressOfPushedCallerIP() { PORTABILITY_ASSERT("@TODO: FIXME:ARM64"); return NULL; }
- PTR_UIntNative get_LowerBoundForConservativeReporting() { PORTABILITY_ASSERT("@TODO: FIXME:ARM64"); return NULL; }
+ PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
+ PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedLR); }
+ PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); }
void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
{
- UNREFERENCED_PARAMETER(pRegisterSet);
- PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
+ pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP);
}
#else
#error NYI for this arch
@@ -940,7 +1022,7 @@ struct CALL_DESCR_CONTEXT
UIntNative IP;
};
#elif defined(_TARGET_ARM64_)
-// @TODO: Add ARM64 entries
+// ARM64TODO: Add ARM64 entries
struct CALL_DESCR_CONTEXT
{
UIntNative IP;
@@ -1077,7 +1159,17 @@ void StackFrameIterator::UnwindThrowSiteThunk()
m_RegDisplay.pR10 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R10);
m_RegDisplay.pR11 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R11);
#elif defined(_TARGET_ARM64_)
- PORTABILITY_ASSERT("@TODO: FIXME:ARM64");
+ m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X19);
+ m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X20);
+ m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X21);
+ m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X22);
+ m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X23);
+ m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X24);
+ m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X25);
+ m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X26);
+ m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X27);
+ m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X28);
+ // ARM64TODO: FP? LR?
#elif defined(_TARGET_X86_)
m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp);
m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rdi);
diff --git a/src/Native/Runtime/arm/ThunkPoolThunks.asm b/src/Native/Runtime/arm/ThunkPoolThunks.asm
index dfff2e370..fea3f16c2 100644
--- a/src/Native/Runtime/arm/ThunkPoolThunks.asm
+++ b/src/Native/Runtime/arm/ThunkPoolThunks.asm
@@ -1,8 +1,6 @@
-;; ==++==
-;;
-;; Copyright (c) Microsoft Corporation. All rights reserved.
-;;
-;; ==--==
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+;; See the LICENSE file in the project root for more information.
#include "kxarm.h"
diff --git a/src/Native/Runtime/arm64/AllocFast.asm b/src/Native/Runtime/arm64/AllocFast.asm
index 157d76f13..2c0b2aba1 100644
--- a/src/Native/Runtime/arm64/AllocFast.asm
+++ b/src/Native/Runtime/arm64/AllocFast.asm
@@ -17,7 +17,7 @@
;;
;; x0 contains EEType pointer
;;
- ldur w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
+ ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
;;
;; x0: EEType pointer
@@ -25,7 +25,11 @@
;; x2: base size
;;
+ ;; Load potential new object address into x12.
ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+ ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+ ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
add x2, x2, x12
ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
cmp x2, x13
@@ -66,7 +70,7 @@ RhpNewFast_RarePath
;; Preserve the EEType in x19
mov x19, x0
- ldur w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
+ ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
;; Call the rest of the allocation helper.
;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
@@ -77,7 +81,7 @@ RhpNewFast_RarePath
str x19, [x0, #OFFSETOF__Object__m_pEEType]
;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
- ldur w1, [x19, #OFFSETOF__EEType__m_uBaseSize]
+ ldr w1, [x19, #OFFSETOF__EEType__m_uBaseSize]
movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16
cmp x1, x2
@@ -90,7 +94,7 @@ RhpNewFast_RarePath
New_SkipPublish
POP_COOP_PINVOKE_FRAME
- ret
+ EPILOG_RETURN
NewOutOfMemory
;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
@@ -100,9 +104,7 @@ NewOutOfMemory
mov x1, 0 ; Indicate that we should throw OOM.
POP_COOP_PINVOKE_FRAME
-
- ldr x12, =RhExceptionHandling_FailedAllocation
- EPILOG_BRANCH_REG x12
+ EPILOG_NOP b RhExceptionHandling_FailedAllocation
NESTED_END RhpNewObject
@@ -110,8 +112,56 @@ NewOutOfMemory
;; x0 == EEType
;; x1 == element/character count
LEAF_ENTRY RhNewString
- ;; ARM64TODO
- brk 0xf000
+ ;; Make sure computing the overall allocation size won't overflow
+ mov x2,#0x7FFFFFFF
+ cmp x1, x2
+ bgt StringSizeOverflow
+
+ ;; Compute overall allocation size (align(base size + (element size * elements), 8)).
+ mov w2, #STRING_COMPONENT_SIZE
+ mov x3, #(STRING_BASE_SIZE + 7)
+ umaddl x2, w1, w2, x3 ; x2 = w1 * w2 + x3
+ and x2, x2, #-8
+
+ ; x0 == EEType
+ ; x1 == element count
+ ; x2 == string size
+
+ INLINE_GETTHREAD x3, x5
+
+ ;; Load potential new object address into x12.
+ ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+ ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+ ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+ add x2, x2, x12
+ ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+ cmp x2, x12
+ bhi RhpNewArrayRare
+
+ ;; Reload new object address into r12.
+ ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+ ;; Update the alloc pointer to account for the allocation.
+ str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+ ;; Set the new object's EEType pointer and element count.
+ str x0, [x12, #OFFSETOF__Object__m_pEEType]
+ str x1, [x12, #OFFSETOF__Array__m_Length]
+
+ ;; Return the object allocated in x0.
+ mov x0, x12
+
+ ret
+
+StringSizeOverflow
+ ; We get here if the length of the final string object can't be represented as an unsigned
+ ; 32-bit value. We're going to tail-call to a managed helper that will throw
+ ; an OOM exception that the caller of this allocator understands.
+
+ ; x0 holds EEType pointer already
+ mov x1, #1 ; Indicate that we should throw OverflowException
+ bl RhExceptionHandling_FailedAllocation
LEAF_END RhNewString
INLINE_GETTHREAD_CONSTANT_POOL
@@ -122,14 +172,17 @@ NewOutOfMemory
;; x1 == element count
LEAF_ENTRY RhpNewArray
- ; we want to limit the element count to the non-negative 32-bit int range
+ ;; We want to limit the element count to the non-negative 32-bit int range.
+ ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+ ;; size is <= 0xffff (it's an unsigned 16-bit value), and the base size for the worst
+ ;; case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
mov x2,#0x7FFFFFFF
cmp x1,x2
bgt ArraySizeOverflow
- ldurh w2, [x0, #OFFSETOF__EEType__m_usComponentSize]
+ ldrh w2, [x0, #OFFSETOF__EEType__m_usComponentSize]
umull x2, w1, w2
- ldur w3, [x0, #OFFSETOF__EEType__m_uBaseSize]
+ ldr w3, [x0, #OFFSETOF__EEType__m_uBaseSize]
add x2, x2, x3
add x2, x2, #7
and x2, x2, #-8
@@ -145,7 +198,6 @@ NewOutOfMemory
;; Determine whether the end of the object would lie outside of the current allocation context. If so,
;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
-
add x2, x2, x12
ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
cmp x2, x12
@@ -168,7 +220,7 @@ NewOutOfMemory
ArraySizeOverflow
; We get here if the size of the final array object can't be represented as an unsigned
- ; 64-bit value. We're going to tail-call to a managed helper that will throw
+ ; 32-bit value. We're going to tail-call to a managed helper that will throw
; an overflow exception that the caller of this allocator understands.
; x0 holds EEType pointer already
@@ -185,9 +237,9 @@ ArraySizeOverflow
;; x3 == Thread
NESTED_ENTRY RhpNewArrayRare
- ; Recover array size by subtracting the alloc_ptr from r2.
- ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
- sub x2, x2, x12
+ ; Recover array size by subtracting the alloc_ptr from x2.
+ PROLOG_NOP ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+ PROLOG_NOP sub x2, x2, x12
PUSH_COOP_PINVOKE_FRAME x3
@@ -231,9 +283,7 @@ ArrayOutOfMemory
mov x1, 0 ; Indicate that we should throw OOM.
POP_COOP_PINVOKE_FRAME
-
- ldr x12, =RhExceptionHandling_FailedAllocation
- EPILOG_BRANCH_REG x12
+ EPILOG_NOP b RhExceptionHandling_FailedAllocation
NESTED_END RhpNewArrayRare
diff --git a/src/Native/Runtime/arm64/AsmMacros.h b/src/Native/Runtime/arm64/AsmMacros.h
index 0f64afafe..dc8d36512 100644
--- a/src/Native/Runtime/arm64/AsmMacros.h
+++ b/src/Native/Runtime/arm64/AsmMacros.h
@@ -21,7 +21,23 @@ GC_ALLOC_ALIGN8_BIAS equ 4
GC_ALLOC_ALIGN8 equ 8
;; Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h
-;; ARM64TODO
+PTFF_SAVE_X19 equ 0x00000001
+PTFF_SAVE_X20 equ 0x00000002
+PTFF_SAVE_X21 equ 0x00000004
+PTFF_SAVE_X22 equ 0x00000008
+PTFF_SAVE_X23 equ 0x00000010
+PTFF_SAVE_X24 equ 0x00000020
+PTFF_SAVE_X25 equ 0x00000040
+PTFF_SAVE_X26 equ 0x00000080
+PTFF_SAVE_X27 equ 0x00000100
+PTFF_SAVE_X28 equ 0x00000200
+PTFF_SAVE_SP equ 0x00000400
+PTFF_SAVE_ALL_PRESERVED equ 0x000003FF ;; NOTE: x19-x28
+PTFF_SAVE_FP equ 0x00080000
+PTFF_SAVE_LR equ 0x00100000
+PTFF_X0_IS_GCREF equ 0x00200000 ;; iff PTFF_SAVE_X0 : set->x0 is Object, clear->x0 is scalar
+PTFF_X0_IS_BYREF equ 0x00400000 ;; iff PTFF_SAVE_X0 : set->x0 is ByRef, clear->x0 is Object or scalar
+PTFF_THREAD_ABORT equ 0x00800000 ;; indicates that ThreadAbortException should be thrown when returning from the transition
;;
;; Rename fields of nested structs
@@ -35,6 +51,11 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbA
EXTERN RhpGcAlloc
EXTERN RhpPublishObject
EXTERN RhExceptionHandling_FailedAllocation
+ IMPORT g_lowest_address
+ IMPORT g_highest_address
+ IMPORT g_ephemeral_low
+ IMPORT g_ephemeral_high
+ IMPORT g_card_table
;; -----------------------------------------------------------------------------
;;
@@ -88,7 +109,8 @@ __SECTIONREL_tls_CurrentThread SETS "SECTIONREL_tls_CurrentThread"
ldr $trashReg, [$trashReg]
ldr $destReg, [xpr, #__tls_array]
ldr $destReg, [$destReg, $trashReg lsl #3]
- ldr $trashReg, $__SECTIONREL_tls_CurrentThread
+ ldr $trashReg, =$__SECTIONREL_tls_CurrentThread
+ ldr $trashReg, [$trashReg]
add $destReg, $destReg, $trashReg
MEND
@@ -128,19 +150,37 @@ __SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_"
;; - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg
;; will contain the address of transition frame.
;;
+
+DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP
+
MACRO
PUSH_COOP_PINVOKE_FRAME $trashReg
- ;; ARM64TODO: reserve stack for any data+flags needed to make the stack walker do its job
+ ;; The following macro variables are just some assembler magic to get the name of the 32-bit version
+ ;; of $trashReg. It does it by string manipulation. Replaces something like x3 with w3.
+ LCLS TrashRegister32Bit
+TrashRegister32Bit SETS "$trashReg"
+TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister32Bit) - 1))
+
+ PROLOG_SAVE_REG_PAIR fp, lr, #-0x80! ;; Push down stack pointer and store FP and LR
+
+ ;; 0x10 bytes reserved for Thread* and flags
- PROLOG_SAVE_REG_PAIR fp, lr, #-0x60! ;; Push down stack pointer and store FP and LR
- mov fp, sp ;; Set the frame pointer to the bottom of the new frame
;; Save callee saved registers
- PROLOG_SAVE_REG_PAIR x19, x20, #16
- PROLOG_SAVE_REG_PAIR x21, x22, #32
- PROLOG_SAVE_REG_PAIR x23, x24, #48
- PROLOG_SAVE_REG_PAIR x25, x26, #64
- PROLOG_SAVE_REG_PAIR x27, x28, #80
+ PROLOG_SAVE_REG_PAIR x19, x20, #0x20
+ PROLOG_SAVE_REG_PAIR x21, x22, #0x30
+ PROLOG_SAVE_REG_PAIR x23, x24, #0x40
+ PROLOG_SAVE_REG_PAIR x25, x26, #0x50
+ PROLOG_SAVE_REG_PAIR x27, x28, #0x60
+
+ ;; Save the value of SP before stack allocation to the last slot in the frame (slot #15)
+ add $trashReg, sp, #0x80
+ str $trashReg, [sp, #0x70]
+
+ ;; Record the bitmask of saved registers in the frame (slot #3)
+ mov $TrashRegister32Bit, #DEFAULT_FRAME_SAVE_FLAGS
+ str $TrashRegister32Bit, [sp, #0x18]
+
mov $trashReg, sp
MEND
@@ -148,12 +188,10 @@ __SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_"
MACRO
POP_COOP_PINVOKE_FRAME
- ;; ARM64TODO: restore stack used by any data + flags needed to make the stack walker do its job
-
- EPILOG_RESTORE_REG_PAIR x19, x20, #16
- EPILOG_RESTORE_REG_PAIR x21, x22, #32
- EPILOG_RESTORE_REG_PAIR x23, x24, #48
- EPILOG_RESTORE_REG_PAIR x25, x26, #64
- EPILOG_RESTORE_REG_PAIR x27, x28, #80
- EPILOG_RESTORE_REG_PAIR fp, lr, #0x60!
+ EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
+ EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
+ EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
+ EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
+ EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
+ EPILOG_RESTORE_REG_PAIR fp, lr, #0x80!
MEND
diff --git a/src/Native/Runtime/arm64/AsmOffsetsCpu.h b/src/Native/Runtime/arm64/AsmOffsetsCpu.h
index 8e4a7867d..f1ae26ee2 100644
--- a/src/Native/Runtime/arm64/AsmOffsetsCpu.h
+++ b/src/Native/Runtime/arm64/AsmOffsetsCpu.h
@@ -18,8 +18,8 @@ PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause)
PLAT_ASM_OFFSET(20, ExInfo, m_frameIter)
PLAT_ASM_OFFSET(278, ExInfo, m_notifyDebuggerSP)
-PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
-PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP)
PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread)
PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_dwFlags)
PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs)
@@ -29,12 +29,35 @@ PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer)
PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC)
PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay)
-PLAT_ASM_SIZEOF(148, PAL_LIMITED_CONTEXT)
-PLAT_ASM_OFFSET(100, PAL_LIMITED_CONTEXT, IP)
-
-// @TODO: Add ARM64 entries for PAL_LIMITED_CONTEXT
+PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, X0)
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, X1)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, X19)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, X20)
+PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, X21)
+PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, X22)
+PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, X23)
+PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, X24)
+PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, X25)
+PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, X26)
+PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, X27)
+PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, X28)
+PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, LR)
+PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP)
PLAT_ASM_SIZEOF(150, REGDISPLAY)
PLAT_ASM_OFFSET(f8, REGDISPLAY, SP)
-// @TODO: Add ARM64 entries for REGDISPLAY
+PLAT_ASM_OFFSET(98, REGDISPLAY, pX19)
+PLAT_ASM_OFFSET(a0, REGDISPLAY, pX20)
+PLAT_ASM_OFFSET(a8, REGDISPLAY, pX21)
+PLAT_ASM_OFFSET(b0, REGDISPLAY, pX22)
+PLAT_ASM_OFFSET(b8, REGDISPLAY, pX23)
+PLAT_ASM_OFFSET(c0, REGDISPLAY, pX24)
+PLAT_ASM_OFFSET(c8, REGDISPLAY, pX25)
+PLAT_ASM_OFFSET(d0, REGDISPLAY, pX26)
+PLAT_ASM_OFFSET(d8, REGDISPLAY, pX27)
+PLAT_ASM_OFFSET(e0, REGDISPLAY, pX28)
+PLAT_ASM_OFFSET(110, REGDISPLAY, D)
diff --git a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
index 5433d49d8..2d43d5bba 100644
--- a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
+++ b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.asm
@@ -6,6 +6,8 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+POINTER_SIZE equ 0x08
+
;;
;; Note: The "__jmpstub__" prefix is used to indicate to debugger
;; that it must step-through this stub when it encounters it while
@@ -16,7 +18,7 @@
;; void CallingConventionConverter_ReturnThunk()
;;
LEAF_ENTRY CallingConventionConverter_ReturnThunk
- brk 0xf000
+ ret
LEAF_END CallingConventionConverter_ReturnThunk
;;
@@ -36,25 +38,27 @@
;; // calling stub is easier to debug
;; }
;;
- ;; sp-4 - Points at CommonCallingStubInputData
+ ;; xip0 - Points at CommonCallingStubInputData
;;
;;
LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub
- brk 0xf000
+ ldr xip1, [xip0] ; put CallingConventionId into xip1 as "parameter" to universal transition thunk
+ ldr xip0, [xip0, #POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0
+ ldr x12, [xip0, #POINTER_SIZE] ; get address of UniversalTransitionThunk (which we'll tailcall to later)
+ ldr xip0, [xip0] ; get address of ManagedCallConverterThunk (target for universal thunk to call)
+ ret x12
LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub
;;
- ;; void CallingConventionConverter_SpecifyCommonStubData(CallingConventionConverter_CommonCallingStub_PointerData *commonData);
- ;;
- LEAF_ENTRY CallingConventionConverter_SpecifyCommonStubData
- brk 0xf000
- LEAF_END CallingConventionConverter_SpecifyCommonStubData
-
- ;;
;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub)
;;
LEAF_ENTRY CallingConventionConverter_GetStubs
- brk 0xf000
+ ldr x12, =CallingConventionConverter_ReturnThunk
+ str x12, [x0] ;; ARM doesn't need different return thunks.
+ str x12, [x1]
+ ldr x12, =__jmpstub__CallingConventionConverter_CommonCallingStub
+ str x12, [x2]
+ ret
LEAF_END CallingConventionConverter_GetStubs
END
diff --git a/src/Native/Runtime/arm64/GetThread.asm b/src/Native/Runtime/arm64/GetThread.asm
index 3d132a006..b91c6535f 100644
--- a/src/Native/Runtime/arm64/GetThread.asm
+++ b/src/Native/Runtime/arm64/GetThread.asm
@@ -13,15 +13,18 @@
;;
;; INPUT: none
;;
-;; OUTPUT: r0: Thread pointer
+;; OUTPUT: x0: Thread pointer
;;
;; MUST PRESERVE ARGUMENT REGISTERS
-;; @todo check the actual requirements here, r0 is both return and argument register
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
LEAF_ENTRY RhpGetThread
- brk 0xf000
+ ;; x0 = GetThread(), TRASHES xip0 (which can be used as an intra-procedure-call scratch register)
+ INLINE_GETTHREAD x0, xip0
+ ret
LEAF_END
FASTCALL_ENDFUNC
+ INLINE_GETTHREAD_CONSTANT_POOL
+
end
diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.asm b/src/Native/Runtime/arm64/InteropThunksHelpers.asm
index aaad54dd4..407e87fce 100644
--- a/src/Native/Runtime/arm64/InteropThunksHelpers.asm
+++ b/src/Native/Runtime/arm64/InteropThunksHelpers.asm
@@ -7,7 +7,7 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; TODO __tls_array equ 0x2C ;; offsetof(TEB, ThreadLocalStoragePointer)
+__tls_array equ 0x58 ;; offsetof(TEB, ThreadLocalStoragePointer)
POINTER_SIZE equ 0x08
@@ -24,15 +24,45 @@ ThunkParamSlot % 0x8
;;
;; RhCommonStub
;;
- NESTED_ENTRY RhCommonStub
- brk 0xf000
- NESTED_END RhCommonStub
+ ;; INPUT: xip0: thunk's data block
+ ;;
+ ;; TRASHES: x9, x10, x11, xip0
+ ;;
+ LEAF_ENTRY RhCommonStub
+ ;; There are arbitrary callers passing arguments with arbitrary signatures.
+ ;; Custom calling convention:
+ ;; xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+
+ brk 0xf000 ;; TODO: remove after debugging/testing stub
+
+ ;; Save context data into the ThunkParamSlot thread-local variable
+ ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation
+ ldr x10, =_tls_index
+ ldr x10, [x10]
+ ldr x9, [xpr, #__tls_array]
+ ldr x9, [x9, x10 lsl #3] ;; x9 <- our TLS base
+
+ ;; x9 = base address of TLS data
+ ;; x10 = trashed
+ ;; xip0 = address of context cell in thunk's data
+
+ ;; store thunk address in thread static
+ ldr x10, [xip0]
+ ldr x11, =ThunkParamSlot
+ str x10, [x9, x11] ;; ThunkParamSlot <- context slot data
+
+ ;; Now load the target address and jump to it.
+ ldr xip0, [xip0, #POINTER_SIZE]
+ ret xip0
+
+ LEAF_END RhCommonStub
;;
;; IntPtr RhGetCommonStubAddress()
;;
LEAF_ENTRY RhGetCommonStubAddress
- brk 0xf000
+ ldr x0, =RhCommonStub
+ ret
LEAF_END RhGetCommonStubAddress
@@ -40,7 +70,18 @@ ThunkParamSlot % 0x8
;; IntPtr RhGetCurrentThunkContext()
;;
LEAF_ENTRY RhGetCurrentThunkContext
- brk 0xf000
+ brk 0xf000 ;; TODO: remove after debugging/testing stub
+
+ ldr x1, =_tls_index
+ ldr x1, [x1]
+ ldr x0, [xpr, #__tls_array]
+ ldr x0, [x0, x1 lsl #3] ;; x0 <- our TLS base
+
+ ldr x1, =ThunkParamSlot
+ ldr x0, [x0, x1] ;; x0 <- ThunkParamSlot
+
+ ret
+
LEAF_END RhGetCurrentThunkContext
END
diff --git a/src/Native/Runtime/arm64/MiscStubs.asm b/src/Native/Runtime/arm64/MiscStubs.asm
index 9d2ea38d3..92015a97e 100644
--- a/src/Native/Runtime/arm64/MiscStubs.asm
+++ b/src/Native/Runtime/arm64/MiscStubs.asm
@@ -4,6 +4,8 @@
#include "AsmMacros.h"
+ EXTERN memcpy
+
TEXTAREA
;;
@@ -62,11 +64,28 @@
;;
LEAF_ENTRY RhpCopyMultibyteNoGCRefs
- brk 0xf000
+
+ ; x0 dest
+ ; x1 src
+ ; x2 count
+
+ cbz x2, NothingToCopy_NoGCRefs ; check for a zero-length copy
+
+ ; Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV,
+ ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be
+ ; translated to a managed exception as usual.
ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation
- brk 0xf000
+ ldrb wzr, [x0]
ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation
- brk 0xf000
+ ldrb wzr, [x1]
+
+ ; tail-call to plain-old-memcpy
+ b memcpy
+
+NothingToCopy_NoGCRefs
+ ; dest is already in x0
+ ret
+
LEAF_END
diff --git a/src/Native/Runtime/arm64/StubDispatch.asm b/src/Native/Runtime/arm64/StubDispatch.asm
index e82a49b21..df958510a 100644
--- a/src/Native/Runtime/arm64/StubDispatch.asm
+++ b/src/Native/Runtime/arm64/StubDispatch.asm
@@ -4,17 +4,41 @@
#include "AsmMacros.h"
- TEXTAREA
-
+ TEXTAREA
#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+ EXTERN RhpCastableObjectResolve
+ EXTERN RhpCidResolve
+ EXTERN RhpUniversalTransition_DebugStepTailCall
- ;;EXTERN t_TLS_DispatchCell
+ MACRO
+ GET_TLS_DISPATCH_CELL
+ ;; ARM64TODO
+ brk 0xf000
+ MEND
+
+ MACRO
+ SET_TLS_DISPATCH_CELL
+ ;; ARM64TODO
+ brk 0xf000
+ MEND
+
+ ;; Macro that generates code to check a single cache entry.
+ MACRO
+ CHECK_CACHE_ENTRY $entry
+ ;; Check a single entry in the cache.
+ ;; x9 : Cache data structure. Also used for target address jump.
+ ;; x10 : Instance EEType*
+ ;; x11 : Trashed
+ ldr x11, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16))]
+ cmp x10, x11
+ bne %ft0 ;; Jump to label '0'
+ ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)]
+ ret x9
+0 ;; Label '0'
+ MEND
-SECTIONREL_t_TLS_DispatchCell
- ;;DCD t_TLS_DispatchCell
- ;;RELOC 15 ;; SECREL
LEAF_ENTRY RhpCastableObjectDispatch_CommonStub
brk 0xf000
@@ -35,7 +59,9 @@ SECTIONREL_t_TLS_DispatchCell
LEAF_END RhpCastableObjectDispatchHelper
+;;
;; Macro that generates a stub consuming a cache with the given number of entries.
+;;
GBLS StubName
MACRO
@@ -44,37 +70,80 @@ SECTIONREL_t_TLS_DispatchCell
StubName SETS "RhpInterfaceDispatch$entries"
NESTED_ENTRY $StubName
- brk 0xf000
+
+ ;; xip1 currently holds the indirection cell address. We need to get the cache structure instead.
+ ldr x9, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+ ;; Load the EEType from the object instance in r0.
+ ldr x10, [x0]
+
+ GBLA CurrentEntry
+CurrentEntry SETA 0
+
+ WHILE CurrentEntry < $entries
+ CHECK_CACHE_ENTRY CurrentEntry
+CurrentEntry SETA CurrentEntry + 1
+ WEND
+
+ ;; xip1 still contains the indirection cell address.
+ b RhpInterfaceDispatchSlow
+
NESTED_END $StubName
MEND
+;;
;; Define all the stub routines we currently need.
- DEFINE_INTERFACE_DISPATCH_STUB 1
- DEFINE_INTERFACE_DISPATCH_STUB 2
- DEFINE_INTERFACE_DISPATCH_STUB 4
- DEFINE_INTERFACE_DISPATCH_STUB 8
- DEFINE_INTERFACE_DISPATCH_STUB 16
- DEFINE_INTERFACE_DISPATCH_STUB 32
- DEFINE_INTERFACE_DISPATCH_STUB 64
+;;
+ DEFINE_INTERFACE_DISPATCH_STUB 1
+ DEFINE_INTERFACE_DISPATCH_STUB 2
+ DEFINE_INTERFACE_DISPATCH_STUB 4
+ DEFINE_INTERFACE_DISPATCH_STUB 8
+ DEFINE_INTERFACE_DISPATCH_STUB 16
+ DEFINE_INTERFACE_DISPATCH_STUB 32
+ DEFINE_INTERFACE_DISPATCH_STUB 64
+;;
;; Initial dispatch on an interface when we don't have a cache yet.
+;;
LEAF_ENTRY RhpInitialInterfaceDispatch
- brk 0xf000
+ ;; Just tail call to the cache miss helper.
+ b RhpInterfaceDispatchSlow
LEAF_END RhpInitialInterfaceDispatch
+;;
+;; Stub dispatch routine for dispatch to a vtable slot
+;;
LEAF_ENTRY RhpVTableOffsetDispatch
- brk 0xf000
+ ;; xip1 has the interface dispatch cell address in it.
+ ;; load x12 to point to the vtable offset (which is stored in the m_pCache field).
+ ldr x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+ ;; Load the EEType from the object instance in x0, and add it to the vtable offset
+ ;; to get the address in the vtable of what we want to dereference
+ ldr x13, [x0]
+ add x12, x12, x13
+
+ ;; Load the target address of the vtable into x12
+ ldr x12, [x12]
+
+ ret x12
LEAF_END RhpVTableOffsetDispatch
+;;
;; Cache miss case, call the runtime to resolve the target and update the cache.
+;;
LEAF_ENTRY RhpInterfaceDispatchSlow
ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
- brk 0xf000
+ ;; xip1 has the interface dispatch cell address in it.
+ ;; Calling convention of the universal thunk is:
+ ;; xip0: contains target address for the thunk to call
+ ;; xip1: contains parameter of the thunk's target
+ ldr xip0, =RhpCidResolve
+ b RhpUniversalTransition_DebugStepTailCall
LEAF_END RhpInterfaceDispatchSlow
-
#endif // FEATURE_CACHED_INTERFACE_DISPATCH
- end
+ END
diff --git a/src/Native/Runtime/arm64/ThunkPoolThunks.asm b/src/Native/Runtime/arm64/ThunkPoolThunks.asm
index a039939c5..8aa044a34 100644
--- a/src/Native/Runtime/arm64/ThunkPoolThunks.asm
+++ b/src/Native/Runtime/arm64/ThunkPoolThunks.asm
@@ -1,116 +1,183 @@
-;; ==++==
-;;
-;; Copyright (c) Microsoft Corporation. All rights reserved.
-;;
-;; ==--==
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+;; See the LICENSE file in the project root for more information.
#include "ksarm64.h"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STUBS & DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ARM64TODO
-;; THUNK_CODESIZE equ 0x10 ;; 4-byte mov, 2-byte add, 4-byte str, 4-byte ldr, 2-byte branch
-;; THUNK_DATASIZE equ 0x08 ;; 2 dwords
-;;
-;; THUNK_POOL_NUM_THUNKS_PER_PAGE equ 0xFA ;; 250 thunks per page
-;;
-;; PAGE_SIZE equ 0x1000 ;; 4K
-;; POINTER_SIZE equ 0x04
+THUNK_CODESIZE equ 0x10 ;; 3 instructions, 4 bytes each (and we also have 4 bytes of padding)
+THUNK_DATASIZE equ 0x10 ;; 2 qwords
+
+THUNK_POOL_NUM_THUNKS_PER_PAGE equ 0xFA ;; 250 thunks per page
+
+POINTER_SIZE equ 0x08
MACRO
NAMED_READONLY_DATA_SECTION $name, $areaAlias
AREA $areaAlias,DATA,READONLY
-RO$name % 4
+RO$name % 8
MEND
-
+
+ ;; This macro is used to declare the thunks data blocks. Unlike the macro above (which is just used for padding),
+ ;; this macro needs to assign labels to each data block, so we can address them using PC-relative addresses.
MACRO
- NAMED_READWRITE_DATA_SECTION $name, $areaAlias
+ NAMED_READWRITE_DATA_SECTION $name, $areaAlias, $pageIndex
AREA $areaAlias,DATA
-RW$name % 4
+ THUNKS_DATA_PAGE_BLOCK $pageIndex
MEND
MACRO
- LOAD_DATA_ADDRESS $groupIndex, $index
+ LOAD_DATA_ADDRESS $groupIndex, $index, $pageIndex
ALIGN 0x10 ;; make sure we align to 16-byte boundary for CFG table
- brk 0xf000
+
+ ;; Set xip0 to the address of the current thunk's data block. This is done using labels.
+ adr xip0, label_$groupIndex_$index_P$pageIndex
MEND
MACRO
JUMP_TO_COMMON $groupIndex, $index
- brk 0xf000
+ ;; start : xip0 points to the current thunks first data cell in the data page
+ ;; set xip0 to begining of data page : xip0 <- xip0 - (THUNK_DATASIZE * current thunk's index)
+ ;; fix offset to point to last QWROD in page : xip1 <- [xip0 + PAGE_SIZE - POINTER_SIZE]
+ ;; tailcall to the location pointed at by the last qword in the data page
+ ldr xip1, [xip0, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))]
+ ret xip1
+
+ brk 0xf000 ;; Stubs need to be 16-byte aligned (see comment above). Filling padding with a
+ ;; deterministic brk instruction, instead of having it just filled with zeros.
+ MEND
+
+ MACRO
+ THUNK_LABELED_DATA_BLOCK $groupIndex, $index, $pageIndex
+
+ ;; Each data block contains 2 qword cells. The data block is also labeled so it can be addressed
+ ;; using PC relative instructions
+label_$groupIndex_$index_P$pageIndex
+ DCQ 0
+ DCQ 0
MEND
MACRO
- TenThunks $groupIndex
+ TenThunks $groupIndex, $pageIndex
+
;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
- ;; and call a common stub. The address of the common stub is setup by the caller (last dword
+ ;; and call a common stub. The address of the common stub is setup by the caller (last qword
;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...)
- ;; Each data block used by a thunk consists of two dword values:
+ ;; Each data block used by a thunk consists of two qword values:
;; - Context: some value given to the thunk as context. Example for fat-fptrs: context = generic dictionary
;; - Target : target code that the thunk eventually jumps to.
- LOAD_DATA_ADDRESS $groupIndex,0
+ LOAD_DATA_ADDRESS $groupIndex,0,$pageIndex
JUMP_TO_COMMON $groupIndex,0
- LOAD_DATA_ADDRESS $groupIndex,1
+ LOAD_DATA_ADDRESS $groupIndex,1,$pageIndex
JUMP_TO_COMMON $groupIndex,1
- LOAD_DATA_ADDRESS $groupIndex,2
+ LOAD_DATA_ADDRESS $groupIndex,2,$pageIndex
JUMP_TO_COMMON $groupIndex,2
- LOAD_DATA_ADDRESS $groupIndex,3
+ LOAD_DATA_ADDRESS $groupIndex,3,$pageIndex
JUMP_TO_COMMON $groupIndex,3
- LOAD_DATA_ADDRESS $groupIndex,4
+ LOAD_DATA_ADDRESS $groupIndex,4,$pageIndex
JUMP_TO_COMMON $groupIndex,4
- LOAD_DATA_ADDRESS $groupIndex,5
+ LOAD_DATA_ADDRESS $groupIndex,5,$pageIndex
JUMP_TO_COMMON $groupIndex,5
- LOAD_DATA_ADDRESS $groupIndex,6
+ LOAD_DATA_ADDRESS $groupIndex,6,$pageIndex
JUMP_TO_COMMON $groupIndex,6
- LOAD_DATA_ADDRESS $groupIndex,7
+ LOAD_DATA_ADDRESS $groupIndex,7,$pageIndex
JUMP_TO_COMMON $groupIndex,7
- LOAD_DATA_ADDRESS $groupIndex,8
+ LOAD_DATA_ADDRESS $groupIndex,8,$pageIndex
JUMP_TO_COMMON $groupIndex,8
- LOAD_DATA_ADDRESS $groupIndex,9
+ LOAD_DATA_ADDRESS $groupIndex,9,$pageIndex
JUMP_TO_COMMON $groupIndex,9
MEND
MACRO
- THUNKS_PAGE_BLOCK
+ TenThunkDataBlocks $groupIndex, $pageIndex
+
+ ;; Similar to the thunks stubs block, we declare the thunks data blocks here
+
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 0, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 1, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 2, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 3, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 4, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 5, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 6, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 7, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 8, $pageIndex
+ THUNK_LABELED_DATA_BLOCK $groupIndex, 9, $pageIndex
+ MEND
+
+ MACRO
+ THUNKS_PAGE_BLOCK $pageIndex
+
+ TenThunks 0, $pageIndex
+ TenThunks 1, $pageIndex
+ TenThunks 2, $pageIndex
+ TenThunks 3, $pageIndex
+ TenThunks 4, $pageIndex
+ TenThunks 5, $pageIndex
+ TenThunks 6, $pageIndex
+ TenThunks 7, $pageIndex
+ TenThunks 8, $pageIndex
+ TenThunks 9, $pageIndex
+ TenThunks 10, $pageIndex
+ TenThunks 11, $pageIndex
+ TenThunks 12, $pageIndex
+ TenThunks 13, $pageIndex
+ TenThunks 14, $pageIndex
+ TenThunks 15, $pageIndex
+ TenThunks 16, $pageIndex
+ TenThunks 17, $pageIndex
+ TenThunks 18, $pageIndex
+ TenThunks 19, $pageIndex
+ TenThunks 20, $pageIndex
+ TenThunks 21, $pageIndex
+ TenThunks 22, $pageIndex
+ TenThunks 23, $pageIndex
+ TenThunks 24, $pageIndex
+ MEND
+
+ MACRO
+ THUNKS_DATA_PAGE_BLOCK $pageIndex
- TenThunks 0
- TenThunks 1
- TenThunks 2
- TenThunks 3
- TenThunks 4
- TenThunks 5
- TenThunks 6
- TenThunks 7
- TenThunks 8
- TenThunks 9
- TenThunks 10
- TenThunks 11
- TenThunks 12
- TenThunks 13
- TenThunks 14
- TenThunks 15
- TenThunks 16
- TenThunks 17
- TenThunks 18
- TenThunks 19
- TenThunks 20
- TenThunks 21
- TenThunks 22
- TenThunks 23
- TenThunks 24
+ TenThunkDataBlocks 0, $pageIndex
+ TenThunkDataBlocks 1, $pageIndex
+ TenThunkDataBlocks 2, $pageIndex
+ TenThunkDataBlocks 3, $pageIndex
+ TenThunkDataBlocks 4, $pageIndex
+ TenThunkDataBlocks 5, $pageIndex
+ TenThunkDataBlocks 6, $pageIndex
+ TenThunkDataBlocks 7, $pageIndex
+ TenThunkDataBlocks 8, $pageIndex
+ TenThunkDataBlocks 9, $pageIndex
+ TenThunkDataBlocks 10, $pageIndex
+ TenThunkDataBlocks 11, $pageIndex
+ TenThunkDataBlocks 12, $pageIndex
+ TenThunkDataBlocks 13, $pageIndex
+ TenThunkDataBlocks 14, $pageIndex
+ TenThunkDataBlocks 15, $pageIndex
+ TenThunkDataBlocks 16, $pageIndex
+ TenThunkDataBlocks 17, $pageIndex
+ TenThunkDataBlocks 18, $pageIndex
+ TenThunkDataBlocks 19, $pageIndex
+ TenThunkDataBlocks 20, $pageIndex
+ TenThunkDataBlocks 21, $pageIndex
+ TenThunkDataBlocks 22, $pageIndex
+ TenThunkDataBlocks 23, $pageIndex
+ TenThunkDataBlocks 24, $pageIndex
MEND
+
;;
;; The first thunks section should be 64K aligned because it can get
;; mapped multiple times in memory, and mapping works on allocation
@@ -144,65 +211,66 @@ RW$name % 4
NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, "|.pad14|"
;;
+ ;; Declaring all the data section first since they have labels referenced by the stubs sections, to prevent
+ ;; compilation errors ("undefined symbols"). The stubs/data sections will be correctly laid out in the image
+ ;; using using the explicit layout configurations (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
+ ;;
+ NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|", 0
+ NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|", 1
+ NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|", 2
+ NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|", 3
+ NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|", 4
+ NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|", 5
+ NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|", 6
+ NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|", 7
+
+ ;;
;; Thunk Stubs
;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
;; - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
;; - ndp\rh\src\tools\rhbind\zapimage.h
;;
+
LEAF_ENTRY ThunkPool, "|.tks0|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 0
LEAF_END ThunkPool
- NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|"
-
LEAF_ENTRY ThunkPool1, "|.tks1|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 1
LEAF_END ThunkPool1
- NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|"
-
LEAF_ENTRY ThunkPool2, "|.tks2|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 2
LEAF_END ThunkPool2
- NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|"
-
LEAF_ENTRY ThunkPool3, "|.tks3|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 3
LEAF_END ThunkPool3
- NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|"
-
LEAF_ENTRY ThunkPool4, "|.tks4|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 4
LEAF_END ThunkPool4
- NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|"
-
LEAF_ENTRY ThunkPool5, "|.tks5|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 5
LEAF_END ThunkPool5
- NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|"
-
LEAF_ENTRY ThunkPool6, "|.tks6|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 6
LEAF_END ThunkPool6
-
- NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|"
-
+
LEAF_ENTRY ThunkPool7, "|.tks7|"
- THUNKS_PAGE_BLOCK
+ THUNKS_PAGE_BLOCK 7
LEAF_END ThunkPool7
- NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|"
-
;;
;; IntPtr RhpGetThunksBase()
;;
LEAF_ENTRY RhpGetThunksBase
- brk 0xf000
+ ;; Return the address of the first thunk pool to the caller (this is really the base address)
+ ldr x0, =ThunkPool
+ ret
LEAF_END RhpGetThunksBase
@@ -212,42 +280,54 @@ RW$name % 4
;; int RhpGetNumThunksPerBlock()
;;
LEAF_ENTRY RhpGetNumThunksPerBlock
- brk 0xf000
+ mov x0, THUNK_POOL_NUM_THUNKS_PER_PAGE
+ ret
LEAF_END RhpGetNumThunksPerBlock
;;
;; int RhpGetThunkSize()
;;
LEAF_ENTRY RhpGetThunkSize
- brk 0xf000
+ mov x0, THUNK_CODESIZE
+ ret
LEAF_END RhpGetThunkSize
;;
;; int RhpGetNumThunkBlocksPerMapping()
;;
LEAF_ENTRY RhpGetNumThunkBlocksPerMapping
- brk 0xf000
+ mov x0, 8
+ ret
LEAF_END RhpGetNumThunkBlocksPerMapping
;;
;; int RhpGetThunkBlockSize
;;
LEAF_ENTRY RhpGetThunkBlockSize
- brk 0xf000
+ mov x0, PAGE_SIZE * 2
+ ret
LEAF_END RhpGetThunkBlockSize
;;
;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
;;
LEAF_ENTRY RhpGetThunkDataBlockAddress
- brk 0xf000
+ mov x12, PAGE_SIZE - 1
+ bic x0, x0, x12
+ mov x12, PAGE_SIZE
+ add x0, x0, x12
+ ret
LEAF_END RhpGetThunkDataBlockAddress
;;
;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
;;
LEAF_ENTRY RhpGetThunkStubsBlockAddress
- brk 0xf000
+ mov x12, PAGE_SIZE - 1
+ bic x0, x0, x12
+ mov x12, PAGE_SIZE
+ sub x0, x0, x12
+ ret
LEAF_END RhpGetThunkStubsBlockAddress
END
diff --git a/src/Native/Runtime/arm64/UniversalTransition.asm b/src/Native/Runtime/arm64/UniversalTransition.asm
index 3cdf971f3..a02e8c76e 100644
--- a/src/Native/Runtime/arm64/UniversalTransition.asm
+++ b/src/Native/Runtime/arm64/UniversalTransition.asm
@@ -4,20 +4,145 @@
#include "AsmMacros.h"
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+ EXTERN RhpIntegerTrashValues
+ EXTERN RhpFpTrashValues
+#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
+#define COUNT_ARG_REGISTERS (8)
+#define INTEGER_REGISTER_SIZE (8)
+#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
+
+;; Largest return block is 8 doubles
+#define RETURN_BLOCK_SIZE (64)
+
+#define COUNT_FLOAT_ARG_REGISTERS (8)
+#define FLOAT_REGISTER_SIZE (8)
+#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE)
+
+#define PUSHED_LR_SIZE (8)
+#define PUSHED_FP_SIZE (8)
+
+;;
+;; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+;;
+;; ARGUMENT_REGISTERS_SIZE
+;; RETURN_BLOCK_SIZE
+;; FLOAT_ARG_REGISTERS_SIZE
+;; PUSHED_LR
+;; PUSHED_FP
+;;
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE)
+
+#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_LR_SIZE + PUSHED_FP_SIZE)
+#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE)
+#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE)
+
+;;
+;; RhpUniversalTransition
+;;
+;; At input to this function, x0-7, d0-7 and the stack may contain any number of arguments.
+;;
+;; In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register:
+;; xip0 will contain the managed function that is to be called by this transition function
+;; xip1 will contain the pointer sized extra argument to the managed function
+;;
+;; When invoking the callee:
+;;
+;; x0 shall contain a pointer to the TransitionBlock
+;; x1 shall contain the value that was in xip1 at entry to this function
+;;
+;; Frame layout is:
+;;
+;; {StackPassedArgs} ChildSP+0D0 CallerSP+000
+;; {IntArgRegs (x0-x7) (0x40 bytes)} ChildSP+090 CallerSP-040
+;; {ReturnBlock (0x40 bytes)} ChildSP+050 CallerSP-080
+;; -- The base address of the Return block is the TransitionBlock pointer, the floating point args are
+;; in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact
+;; layout of all pieces of the frame that lie at or above the pushed floating point registers.
+;; {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+010 CallerSP-0C0
+;; {PushedLR} ChildSP+008 CallerSP-0C8
+;; {PushedFP} ChildSP+000 CallerSP-0D0
+;;
+;; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+;; must be updated as well.
+;;
+;; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+;; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+;; FpArgRegs.
+;;
+;; NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+;; everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+;;
+
TEXTAREA
- MACRO
+ MACRO
UNIVERSAL_TRANSITION $FunctionName
NESTED_ENTRY Rhp$FunctionName
- brk 0xf000
+ ;; FP and LR registers
+ PROLOG_SAVE_REG_PAIR fp, lr, #-STACK_SIZE! ;; Push down stack pointer and store FP and LR
+
+ ;; Floating point registers
+ stp d0, d1, [sp, #(FLOAT_ARG_OFFSET )]
+ stp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)]
+ stp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)]
+ stp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)]
+
+ ;; Space for return buffer data (0x40 bytes)
+
+ ;; Save argument registers
+ stp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )]
+ stp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)]
+ stp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)]
+ stp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)]
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+ ;; ARM64TODO
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+ add x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK ;; First parameter to target function is a pointer to the return block
+ mov x8, x0 ;; Arm64 calling convention: Address of return block shall be passed in x8
+ mov x1, xip1 ;; Second parameter to target function
+ blr xip0
+
+ ;; We cannot make the label public as that tricks DIA stackwalker into thinking
+ ;; it's the beginning of a method. For this reason we export an auxiliary variable
+ ;; holding the address instead.
EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom$FunctionName
- brk 0xf000
+
+ ;; Move the result (the target address) to x12 so it doesn't get overridden when we restore the
+ ;; argument registers.
+ mov x12, x0
+
+ ;; Restore floating point registers
+ ldp d0, d1, [sp, #(FLOAT_ARG_OFFSET )]
+ ldp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)]
+ ldp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)]
+ ldp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)]
+
+ ;; Restore the argument registers
+ ldp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )]
+ ldp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)]
+ ldp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)]
+ ldp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)]
+
+ ;; Restore FP and LR registers, and free the allocated stack block
+ EPILOG_RESTORE_REG_PAIR fp, lr, #STACK_SIZE!
+
+ ;; Tailcall to the target address.
+ EPILOG_NOP ret x12
NESTED_END Rhp$FunctionName
- MEND
+ MEND
; To enable proper step-in behavior in the debugger, we need to have two instances
; of the thunk. For the first one, the debugger steps into the call in the function,
diff --git a/src/Native/Runtime/arm64/WriteBarriers.asm b/src/Native/Runtime/arm64/WriteBarriers.asm
index a307b6f62..5a41808a2 100644
--- a/src/Native/Runtime/arm64/WriteBarriers.asm
+++ b/src/Native/Runtime/arm64/WriteBarriers.asm
@@ -11,27 +11,114 @@
#include "AsmMacros.h"
+ MACRO
+ ;; On entry:
+ ;; $DESTREG: location to be updated
+ ;; $REFREG: objectref to be stored
+ ;; On exit:
+ ;; $DESTREG: trashed
+ ;; x9: trashed
+ INSERT_UNCHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG
+ ;; we can skip the card table write if the reference is to
+ ;; an object not on the epehemeral segment.
+ adrp x9, g_ephemeral_low
+ ldr x9, [x9, g_ephemeral_low]
+ cmp $REFREG, x9
+ blt %ft0
+
+ adrp x9, g_ephemeral_high
+ ldr x9, [x9, g_ephemeral_high]
+ cmp $REFREG, x9
+ bge %ft0
+
+ ;; set this object's card, if it hasn't already been set.
+ adrp x9, g_card_table
+ ldr x9, [x9, g_card_table]
+ add $DESTREG, x9, $DESTREG lsr #11
+ ldrb w9, [$DESTREG]
+ cmp x9, 0xFF
+ beq %ft0
+
+ mov x9, 0xFF
+ strb w9, [$DESTREG]
+
+0
+ ;; exit label
+ MEND ;; INSERT_UNCHECKED_WRITE_BARRIER_CORE
+
+ MACRO
+ ;; On entry:
+ ;; $DESTREG: location to be updated
+ ;; $REFREG: objectref to be stored
+ ;; On exit:
+ ;; $DESTREG: trashed
+ ;; x9: trashed
+ INSERT_CHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG
+ ;; the "check" of this checked write barrier - is $DESTREG
+ ;; within the heap? if no, early out.
+ adrp x9, g_lowest_address
+ ldr x9, [x9, g_lowest_address]
+ cmp $DESTREG, x9
+ blt %ft0
+
+ adrp x9, g_highest_address
+ ldr x9, [x9, g_highest_address]
+ cmp $DESTREG, x9
+ bgt %ft0
+
+ INSERT_UNCHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG
+
+0
+ ;; exit label
+ MEND ;; INSERT_CHECKED_WRITE_BARRIER_CORE
+
+
TEXTAREA
- LEAF_ENTRY RhpCheckedAssignRefXXX
- brk 0xf000
- ALTERNATE_ENTRY RhpCheckedAssignRef
- brk 0xf000
- ALTERNATE_ENTRY RhpCheckedAssignRefAvLocation
- brk 0xf000
+ ;; RhpCheckedAssignRef(Object** dst, Object* src)
+ ;;
+ ;; write barrier for writes to objects that may reside
+ ;; on the managed heap.
+ ;;
+ ;; On entry:
+ ;; x0 : the destination address (LHS of the assignment).
+ ;; May not be an object reference (hence the checked).
+ ;; x1 : the object reference (RHS of the assignment).
+ ;; On exit:
+ ;; x1 : trashed
+ ;; x9 : trashed
+ LEAF_ENTRY RhpCheckedAssignRef
ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
- brk 0xf000
- LEAF_END RhpCheckedAssignRefXXX
-
- LEAF_ENTRY RhpAssignRefXXX
- brk 0xf000
- ALTERNATE_ENTRY RhpAssignRef
- brk 0xf000
- ALTERNATE_ENTRY RhpAssignRefAvLocationXXX
- brk 0xf000
+ ALTERNATE_ENTRY RhpCheckedAssignRefX1
+ ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation
+ stlr x1, [x0]
+
+ INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+ ret lr
+ LEAF_END RhpCheckedAssignRef
+
+ ;; RhpAssignRef(Object** dst, Object* src)
+ ;;
+ ;; write barrier for writes to objects that are known to
+ ;; reside on the managed heap.
+ ;;
+ ;; On entry:
+ ;; x0 : the destination address (LHS of the assignment).
+ ;; x1 : the object reference (RHS of the assignment).
+ ;; On exit:
+ ;; x1 : trashed
+ ;; x9 : trashed
+ LEAF_ENTRY RhpAssignRef
ALTERNATE_ENTRY RhpAssignRefAVLocation
- brk 0xf000
- LEAF_END RhpAssignRefXXX
+ ALTERNATE_ENTRY RhpAssignRefX1
+ ALTERNATE_ENTRY RhpAssignRefX1AVLocation
+ stlr x1, [x0]
+
+ INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1
+
+ ret lr
+ LEAF_END RhpAssignRef
;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
;; successful updates.
@@ -40,42 +127,74 @@
;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+ ;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+ ;;
;; Interlocked compare exchange on objectref.
;;
;; On entry:
- ;; r0: pointer to objectref
- ;; r1: exchange value
- ;; r2: comparand
+ ;; x0: pointer to objectref
+ ;; x1: exchange value
+ ;; x2: comparand
;;
;; On exit:
- ;; r0: original value of objectref
- ;; r1,r2,r3,r12: trashed
+ ;; x0: original value of objectref
+ ;; x9: trashed
+ ;; x10: trashed
;;
LEAF_ENTRY RhpCheckedLockCmpXchg
- brk 0xf000
ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
- brk 0xf000
+CmpXchgRetry
+ ldaxr x10, [x0]
+ cmp x10, x2
+ bne NoUpdate
+
+ stlxr w9, x1, [x0]
+ cbnz w9, CmpXchgRetry
+
+ ;; write was successful.
+ INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+NoUpdate
+ ;; x10 still contains the original value.
+ mov x0, x10
+ ret lr
+
LEAF_END RhpCheckedLockCmpXchg
+ ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+ ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+ ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+ ;; RhpCheckedXchg(Object** destination, Object* value)
+ ;;
;; Interlocked exchange on objectref.
;;
;; On entry:
- ;; r0: pointer to objectref
- ;; r1: exchange value
+ ;; x0: pointer to objectref
+ ;; x1: exchange value
;;
;; On exit:
- ;; r0: original value of objectref
- ;; r1,r2,r3,r12: trashed
+ ;; x0: original value of objectref
+ ;; x9: trashed
+ ;; x10: trashed
;;
-
- ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
- ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
- ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
-
LEAF_ENTRY RhpCheckedXchg
- brk 0xf000
ALTERNATE_ENTRY RhpCheckedXchgAVLocation
- brk 0xf000
+ExchangeRetry
+ ;; read the existing memory location.
+ ldaxr x10, [x0]
+
+ ;; INSERT_CHECKED_WRITE_BARRIER_CORE trashes x9,
+ ;; so we'll use it for the short-lifetime variable here.
+ stlxr w9, x1, [x0]
+ cbnz w9, ExchangeRetry
+
+ ;; write was successful.
+ INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+ ;; x10 still contains the original value.
+ mov x0, x10
+ ret lr
LEAF_END RhpCheckedXchg
end
diff --git a/src/Native/Runtime/inc/gcinfo.h b/src/Native/Runtime/inc/gcinfo.h
index 21e7a7ff0..405973aa4 100644
--- a/src/Native/Runtime/inc/gcinfo.h
+++ b/src/Native/Runtime/inc/gcinfo.h
@@ -7,6 +7,8 @@
#define _GCINFO_H_
/*****************************************************************************/
+// Keep definitions in this file in sync with Nutc\UTC\gcinfo.h
+
#ifdef _TARGET_ARM_
#define NUM_PRESERVED_REGS 9
@@ -113,7 +115,6 @@ enum ScratchRegMask
};
#elif defined(_TARGET_ARM64_)
-// ARM64TODO: add all arm64-related changes in this file to gcinfo.h in E:\ProjNDev3X\src\Nutc\UTC
enum RegMask
{
@@ -152,21 +153,25 @@ enum RegMask
RBM_X28 = 0x10000000, // RA_CALLEESAVE
RBM_FP = 0x20000000,
- RBM_LR = 0x40000000, // ARM64TODO: check to which lists it should be added
+ RBM_LR = 0x40000000,
RBM_SP = 0x80000000,
-
RBM_RETVAL = RBM_X8,
- RBM_CALLEE_SAVED_REGS = (RBM_X19 | RBM_X20 | RBM_X21 | RBM_X22 | RBM_X23 | RBM_X24 | RBM_X25 | RBM_X26 | RBM_X27 | RBM_X28),
- RBM_CALLEE_SAVED_REG_COUNT = 10,
+ RBM_CALLEE_SAVED_REGS = (RBM_X19 | RBM_X20 | RBM_X21 | RBM_X22 | RBM_X23 | RBM_X24 | RBM_X25 | RBM_X26 | RBM_X27 | RBM_X28 |
+ RBM_FP | RBM_LR),
+ RBM_CALLEE_SAVED_REG_COUNT = 12,
RBM_SCRATCH_REGS = (RBM_X0 | RBM_X1 | RBM_X2 | RBM_X3 | RBM_X4 | RBM_X5 | RBM_X6 | RBM_X7 | RBM_X8 | RBM_X9 |
- RBM_X10 | RBM_X11 | RBM_X12 | RBM_X13 | RBM_X14 | RBM_X15 | RBM_XIP0| RBM_XIP1),
- RBM_SCRATCH_REG_COUNT = 18,
+ RBM_X10 | RBM_X11 | RBM_X12 | RBM_X13 | RBM_X14 | RBM_X15 | RBM_XIP0 | RBM_XIP1 | RBM_LR),
+ RBM_SCRATCH_REG_COUNT = 19,
};
#define NUM_PRESERVED_REGS RBM_CALLEE_SAVED_REG_COUNT
+// Number of the callee-saved registers stored in the fixed header
+#define NUM_PRESERVED_REGS_LOW 10
+#define MASK_PRESERVED_REGS_LOW ((1 << NUM_PRESERVED_REGS_LOW) - 1)
+
enum RegNumber
{
RN_X0 = 0,
@@ -210,35 +215,40 @@ enum RegNumber
enum CalleeSavedRegNum
{
- CSR_NUM_X19 = 0,
- CSR_NUM_X20 = 1,
- CSR_NUM_X21 = 2,
- CSR_NUM_X22 = 3,
- CSR_NUM_X23 = 4,
- CSR_NUM_X24 = 5,
- CSR_NUM_X25 = 6,
- CSR_NUM_X26 = 7,
- CSR_NUM_X27 = 8,
- CSR_NUM_X28 = 9,
- CSR_NUM_NONE = 10,
+ // NOTE: LR is omitted because it may not be live except as a 'scratch' reg
+ CSR_NUM_X19 = 1,
+ CSR_NUM_X20 = 2,
+ CSR_NUM_X21 = 3,
+ CSR_NUM_X22 = 4,
+ CSR_NUM_X23 = 5,
+ CSR_NUM_X24 = 6,
+ CSR_NUM_X25 = 7,
+ CSR_NUM_X26 = 8,
+ CSR_NUM_X27 = 9,
+ CSR_NUM_X28 = 10,
+ CSR_NUM_FP = 11,
+ CSR_NUM_NONE = 12,
};
enum CalleeSavedRegMask
{
CSR_MASK_NONE = 0x00,
- CSR_MASK_X19 = 0x001,
- CSR_MASK_X20 = 0x002,
- CSR_MASK_X21 = 0x004,
- CSR_MASK_X22 = 0x008,
- CSR_MASK_X23 = 0x010,
- CSR_MASK_X24 = 0x020,
- CSR_MASK_X25 = 0x040,
- CSR_MASK_X26 = 0x080,
- CSR_MASK_X27 = 0x100,
- CSR_MASK_X28 = 0x200,
-
- CSR_MASK_ALL = 0x3ff,
- CSR_MASK_HIGHEST = 0x200,
+ // LR is placed here to reduce the frequency of the long encoding
+ CSR_MASK_LR = 0x001,
+ CSR_MASK_X19 = 0x002,
+ CSR_MASK_X20 = 0x004,
+ CSR_MASK_X21 = 0x008,
+ CSR_MASK_X22 = 0x010,
+ CSR_MASK_X23 = 0x020,
+ CSR_MASK_X24 = 0x040,
+ CSR_MASK_X25 = 0x080,
+ CSR_MASK_X26 = 0x100,
+ CSR_MASK_X27 = 0x200,
+ CSR_MASK_X28 = 0x400,
+ CSR_MASK_FP = 0x800,
+
+ CSR_MASK_ALL = 0xfff,
+ CSR_MASK_HIGHEST = 0x800,
};
enum ScratchRegNum
@@ -262,8 +272,9 @@ enum ScratchRegNum
SR_NUM_XIP0 = 16,
SR_NUM_XIP1 = 17,
+ SR_NUM_LR = 18,
- SR_NUM_NONE = 18,
+ SR_NUM_NONE = 19,
};
enum ScratchRegMask
@@ -288,6 +299,7 @@ enum ScratchRegMask
SR_MASK_XIP0 = 0x10000,
SR_MASK_XIP1 = 0x20000,
+ SR_MASK_LR = 0x40000,
};
#else // _TARGET_ARM_
@@ -434,12 +446,12 @@ private:
UInt16 calleeSavedRegMask : NUM_PRESERVED_REGS; // 2 [5:7] 3 [0:5]
UInt16 arm_areParmOrVfpRegsPushed:1; // 1: pushed parm register set from R0-R3 and pushed fp reg start and count is encoded below, 0: no pushed parm or fp registers
#elif defined (_TARGET_ARM64_)
- // ARM64TODO: check
UInt16 returnKind : 2; // 2 [0:1] one of: MethodReturnKind enum
UInt16 ebpFrame : 1; // 2 [2] on x64, this means "has frame pointer and it is RBP", on ARM64 FP
UInt16 epilogAtEnd : 1; // 2 [3]
UInt16 hasFrameSize : 1; // 2 [4] 1: frame size is encoded below, 0: frame size is 0
- UInt16 calleeSavedRegMask : NUM_PRESERVED_REGS; // 2 [5:7] + 3 [0:7]
+ UInt16 longCsrMask : 1; // 2 [5] 1: high bits of calleeSavedRegMask are encoded below
+ UInt16 calleeSavedRegMaskLow : NUM_PRESERVED_REGS_LOW; // 2 [6:7] 3 [0:7]
#else // _TARGET_ARM_
UInt8 returnKind : 2; // 2 [0:1] one of: MethodReturnKind enum
UInt8 ebpFrame : 1; // 2 [2] on x64, this means "has frame pointer and it is RBP", on ARM R7
@@ -473,7 +485,6 @@ private:
//
UInt32 frameSize; // expressed in pointer-sized units, only encoded if hasFrameSize==1
-
// OPTIONAL: only encoded if returnKind = MRK_ReturnsToNative
UInt32 reversePinvokeFrameOffset; // expressed in pointer-sized units away from the frame pointer
@@ -504,12 +515,13 @@ private:
// that can be expressed by a 'ret NNNN' instruction. Therefore, with 6 in the 'low' field and 8 in the
// 'high' field, we are not losing any range here. (Although the need for that full range is debatable.)
UInt8 x86_argCountHigh;
-#endif
-
-#ifdef _TARGET_ARM_
+#elif defined(_TARGET_ARM_)
UInt8 arm_parmRegsPushedSet;
UInt8 arm_vfpRegFirstPushed;
UInt8 arm_vfpRegPushedCount;
+#elif defined(_TARGET_ARM64_)
+ // OPTIONAL: high bits of calleeSavedRegMask are encoded only if longCsrMask = 1; low bits equal to calleeSavedRegMaskLow
+ UInt16 calleeSavedRegMask;
#endif
//
// OPTIONAL: only encoded if hasExtraData = 1
@@ -747,7 +759,7 @@ public:
void SetSavedRegs(CalleeSavedRegMask regMask)
{
- calleeSavedRegMask = regMask;
+ calleeSavedRegMask = (UInt16)regMask;
}
void SetRegSaved(CalleeSavedRegMask regMask)
@@ -1297,6 +1309,12 @@ public:
else
arm_vfpRegFirstPushed = (UInt8)(vfpRegFirstPushed - 1);
}
+#elif defined(_TARGET_ARM64_)
+ calleeSavedRegMask = calleeSavedRegMaskLow;
+ if (longCsrMask)
+ {
+ calleeSavedRegMask |= (*pbDecode++ << NUM_PRESERVED_REGS_LOW);
+ }
#endif
extraDataHeader = hasExtraData ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) : 0;
diff --git a/src/Native/Runtime/inc/rhbinder.h b/src/Native/Runtime/inc/rhbinder.h
index d612fd8fb..8a932e406 100644
--- a/src/Native/Runtime/inc/rhbinder.h
+++ b/src/Native/Runtime/inc/rhbinder.h
@@ -563,6 +563,11 @@ struct InterfaceDispatchCell
// a single instruction within our stubs.
enum PInvokeTransitionFrameFlags
{
+ // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm\AsmMacros.h
+
+ // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has
+ // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
// standard preserved registers
PTFF_SAVE_R4 = 0x00000001,
PTFF_SAVE_R5 = 0x00000002,
@@ -591,9 +596,62 @@ enum PInvokeTransitionFrameFlags
PTFF_THREAD_ABORT = 0x00010000, // indicates that ThreadAbortException should be thrown when returning from the transition
};
+#elif defined(_TARGET_ARM64_)
+enum PInvokeTransitionFrameFlags
+{
+ // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm64\AsmMacros.h
+
+ // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has
+ // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
+ // standard preserved registers
+ PTFF_SAVE_X19 = 0x00000001,
+ PTFF_SAVE_X20 = 0x00000002,
+ PTFF_SAVE_X21 = 0x00000004,
+ PTFF_SAVE_X22 = 0x00000008,
+ PTFF_SAVE_X23 = 0x00000010,
+ PTFF_SAVE_X24 = 0x00000020,
+ PTFF_SAVE_X25 = 0x00000040,
+ PTFF_SAVE_X26 = 0x00000080,
+ PTFF_SAVE_X27 = 0x00000100,
+ PTFF_SAVE_X28 = 0x00000200,
+
+ PTFF_SAVE_SP = 0x00000400, // Used for 'coop pinvokes' in runtime helper routines. Methods with
+ // PInvokes are required to have a frame pointers, but methods which
+ // call runtime helpers are not. Therefore, methods that call runtime
+ // helpers may need SP to seed the stackwalk.
+
+ // Scratch registers
+ PTFF_SAVE_X0 = 0x00000800,
+ PTFF_SAVE_X1 = 0x00001000,
+ PTFF_SAVE_X2 = 0x00002000,
+ PTFF_SAVE_X3 = 0x00004000,
+ PTFF_SAVE_X4 = 0x00008000,
+ PTFF_SAVE_X5 = 0x00010000,
+ PTFF_SAVE_X6 = 0x00020000,
+ PTFF_SAVE_X7 = 0x00040000,
+
+ PTFF_SAVE_FP = 0x00080000, // should never be used, we require FP frames for methods with
+ // pinvoke and it is saved into the frame pointer field instead
+
+ PTFF_SAVE_LR = 0x00100000, // this is useful for the case of loop hijacking where we need both
+ // a return address pointing into the hijacked method and that method's
+ // lr register, which may hold a gc pointer
+
+ // Other flags
+ PTFF_X0_IS_GCREF = 0x00200000, // used by hijack handler to report return value of hijacked method
+ PTFF_X0_IS_BYREF = 0x00400000, // used by hijack handler to report return value of hijacked method
+
+ PTFF_THREAD_ABORT = 0x00800000, // indicates that ThreadAbortException should be thrown when returning from the transition
+};
#else // _TARGET_ARM_
enum PInvokeTransitionFrameFlags
{
+ // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\[amd64|i386]\AsmMacros.inc
+
+ // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has
+ // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
// standard preserved registers
PTFF_SAVE_RBX = 0x00000001,
PTFF_SAVE_RSI = 0x00000002,
@@ -648,12 +706,18 @@ struct PInvokeTransitionFrame
#ifdef _TARGET_ARM_
TgtPTR_Void m_ChainPointer; // R11, used by OS to walk stack quickly
#endif
+#ifdef _TARGET_ARM64_
+ // On arm64, the FP and LR registers are pushed in that order when setting up frames
+ TgtPTR_Void m_FramePointer;
+ TgtPTR_Void m_RIP;
+#else
TgtPTR_Void m_RIP;
TgtPTR_Void m_FramePointer;
+#endif
TgtPTR_Thread m_pThread; // unused by stack crawler, this is so GetThread is only called once per method
// can be an invalid pointer in universal transition cases (which never need to call GetThread)
UInt32 m_dwFlags; // PInvokeTransitionFrameFlags
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
UInt32 m_dwAlignPad2;
#endif
UIntTarget m_PreservedRegs[];
@@ -670,6 +734,10 @@ struct PInvokeTransitionFrame
#elif defined(_TARGET_ARM_)
// R4-R6,R8-R10, R0, SP
#define PInvokeTransitionFrame_SaveRegs_count 8
+#elif defined(_TARGET_ARM64_)
+// X19-X29, X0, SP
+// ARM64TODO: do we need more here? Verify count when you start using this define.
+#define PInvokeTransitionFrame_SaveRegs_count 12
#endif
#define PInvokeTransitionFrame_MAX_SIZE (sizeof(PInvokeTransitionFrame) + (POINTER_SIZE * PInvokeTransitionFrame_SaveRegs_count))
diff --git a/src/Native/Runtime/threadstore.cpp b/src/Native/Runtime/threadstore.cpp
index c12f95678..180e3f199 100644
--- a/src/Native/Runtime/threadstore.cpp
+++ b/src/Native/Runtime/threadstore.cpp
@@ -433,7 +433,10 @@ volatile UInt32 * p_tls_index;
volatile UInt32 SECTIONREL__tls_CurrentThread;
EXTERN_C UInt32 _tls_index;
-
+#if defined(_TARGET_ARM64_)
+// ARM64TODO: Re-enable optimization
+#pragma optimize("", off)
+#endif
void ThreadStore::SaveCurrentThreadOffsetForDAC()
{
p_tls_index = &_tls_index;
@@ -444,7 +447,9 @@ void ThreadStore::SaveCurrentThreadOffsetForDAC()
SECTIONREL__tls_CurrentThread = (UInt32)((UInt8 *)&tls_CurrentThread - pOurTls);
}
-
+#if defined(_TARGET_ARM64_)
+#pragma optimize("", on)
+#endif
#else // DACCESS_COMPILE
GPTR_IMPL(UInt32, p_tls_index);
diff --git a/src/Native/Runtime/windows/PalRedhawkMinWin.cpp b/src/Native/Runtime/windows/PalRedhawkMinWin.cpp
index b57f59a90..c70ab884e 100644
--- a/src/Native/Runtime/windows/PalRedhawkMinWin.cpp
+++ b/src/Native/Runtime/windows/PalRedhawkMinWin.cpp
@@ -331,13 +331,23 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetThreadContext(HAND
pCtx->SP = win32ctx.Sp;
pCtx->LR = win32ctx.Lr;
#elif defined(_ARM64_)
- for (int i = 0; i < GEN_REG_COUNT; ++i) {
- pCtx->X[i] = win32ctx.X[i];
- }
+ pCtx->IP = win32ctx.Pc;
+ pCtx->X0 = win32ctx.X0;
+ pCtx->X1 = win32ctx.X1;
+ // TODO: Copy X2-X7 when we start supporting HVA's
+ pCtx->X19 = win32ctx.X19;
+ pCtx->X20 = win32ctx.X20;
+ pCtx->X21 = win32ctx.X21;
+ pCtx->X22 = win32ctx.X22;
+ pCtx->X23 = win32ctx.X23;
+ pCtx->X24 = win32ctx.X24;
+ pCtx->X25 = win32ctx.X25;
+ pCtx->X26 = win32ctx.X26;
+ pCtx->X27 = win32ctx.X27;
+ pCtx->X28 = win32ctx.X28;
pCtx->SP = win32ctx.Sp;
pCtx->LR = win32ctx.Lr;
pCtx->FP = win32ctx.Fp;
- pCtx->IP = win32ctx.Pc;
#else
#error Unsupported platform
#endif