diff options
author | Jeremy Koritzinsky <jekoritz@microsoft.com> | 2021-01-20 10:58:03 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-20 10:58:03 +0300 |
commit | 5ec0c7a3e8db50143b6afdeb672c1aeb8eb32d31 (patch) | |
tree | 878129405c74a3d7942b6b4d929de052f83fbcdf /src/coreclr/jit | |
parent | 5aef85ae3da23dfd49f95be1c9336f1bad2e0123 (diff) |
Remove extra UnmanagedCallersOnly overhead on x86 (#46238)
* Implement emitting an unmanaged calling convention entry point with the correct argument order and register usage on x86.
* Move Unix x86 to the UnmanagedCallersOnly plan now that we don't need to do argument shuffling.
* Add SEH hookup and profiler/debugger hooks to Reverse P/Invoke entry helper to match custom x86 thunk.
Fixes #46177
* Remove Windows x86 assembly stub for individual reverse p/invokes. Move Windows x86 unmanaged callers only to not have extra overhead and put reverse P/Invoke stubs for Windows x86 on the UnmanagedCallersOnly plan.
* Further cleanup
* Remove extraneous UnmanagedCallersOnly block now that x86 UnmanagedCallersOnly has been simplified.
* Undo ArgOrder size specifier since it isn't needed and it doesn't work.
* Fix copy constructor reverse marshalling. Now that we don't have the emitted unmanaged thunk stub, we need to handle the x86 differences for copy-constructed parameters in the IL stub.
* Fix version guid syntax.
* Remove FastNExportHandler.
* Revert "Remove FastNExportHandler."
This reverts commit 423f70ee4d564147dc0ce370d38b3a38404f8e22.
* Fix setting up entry frame for new thread.
* Allow the NExportSEH record to live below ESP so we don't need to create a new stack frame.
* Fix formatting.
* Assign an offset for the return buffer on x86 since it might come in on the stack.
* Make sure we use the TC block we just put in on x86 as well.
* Shrink the ReversePInvokeFrame on non-x86 back to master's size.
* Fix arch-specific R2R constant.
* Pass the return address of the ReversePInvokeEnter helper to TraceCall instead of the entry point and call TraceCall from all JIT_ReversePInvokeEnter* helpers.
* Fix ILVerification and ILVerify
* fix R2R constants for crossgen1
* Don't assert ReversePInvokeFrame size for cross-bitness scenarios.
Diffstat (limited to 'src/coreclr/jit')
-rw-r--r-- | src/coreclr/jit/codegencommon.cpp | 2 | ||||
-rw-r--r-- | src/coreclr/jit/compiler.cpp | 2 | ||||
-rw-r--r-- | src/coreclr/jit/compiler.h | 10 | ||||
-rw-r--r-- | src/coreclr/jit/flowgraph.cpp | 24 | ||||
-rw-r--r-- | src/coreclr/jit/importer.cpp | 6 | ||||
-rw-r--r-- | src/coreclr/jit/lclvars.cpp | 59 | ||||
-rw-r--r-- | src/coreclr/jit/register_arg_convention.h | 6 | ||||
-rw-r--r-- | src/coreclr/jit/target.h | 3 | ||||
-rw-r--r-- | src/coreclr/jit/targetamd64.cpp | 5 | ||||
-rw-r--r-- | src/coreclr/jit/targetarm.cpp | 5 | ||||
-rw-r--r-- | src/coreclr/jit/targetarm64.cpp | 5 | ||||
-rw-r--r-- | src/coreclr/jit/targetx86.cpp | 5 |
12 files changed, 97 insertions, 35 deletions
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8c4572dcec4..3933ace5f34 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8896,10 +8896,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (compiler->info.compIsVarArgs) fCalleePop = false; -#ifdef UNIX_X86_ABI if (IsCallerPop(compiler->info.compCallConv)) fCalleePop = false; -#endif // UNIX_X86_ABI if (fCalleePop) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2843a57c4ad..39c80a92df1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6184,10 +6184,12 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, { bool unused; info.compCallConv = info.compCompHnd->getUnmanagedCallConv(methodInfo->ftn, nullptr, &unused); + info.compArgOrder = Target::g_tgtUnmanagedArgOrder; } else { info.compCallConv = CorInfoCallConvExtension::Managed; + info.compArgOrder = Target::g_tgtArgOrder; } info.compIsVarArgs = false; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 68b1d343aa4..3feabf558b9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9379,6 +9379,8 @@ public: // current number of EH clauses (after additions like synchronized // methods and funclets, and removals like unreachable code deletion). + Target::ArgOrder compArgOrder; + bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler // and the VM expects that, or the JIT is a "self-host" compiler // (e.g., x86 hosted targeting x86) and the VM expects that. @@ -9458,6 +9460,14 @@ public: return (info.compRetBuffArg != BAD_VAR_NUM); } #endif // TARGET_WINDOWS && TARGET_ARM64 + // 4. x86 unmanaged calling conventions require the address of RetBuff to be returned in eax. + CLANG_FORMAT_COMMENT_ANCHOR; +#if defined(TARGET_X86) + if (info.compCallConv != CorInfoCallConvExtension::Managed) + { + return (info.compRetBuffArg != BAD_VAR_NUM); + } +#endif return false; #endif // TARGET_AMD64 diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 950988adca1..e9cdd48899e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -8683,13 +8683,29 @@ void Compiler::fgAddReversePInvokeEnterExit() varDsc->lvType = TYP_BLK; varDsc->lvExactSize = eeGetEEInfo()->sizeOfReversePInvokeFrame; - GenTree* tree; - // Add enter pinvoke exit callout at the start of prolog - tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK)); + GenTree* pInvokeFrameVar = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK)); + + GenTree* stubArgument; + + if (info.compPublishStubParam) + { + // If we have a secret param for a Reverse P/Invoke, that means that we are in an IL stub. + // In this case, the method handle we pass down to the Reverse P/Invoke helper should be + // the target method, which is passed in the secret parameter. + stubArgument = gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL); + } + else + { + stubArgument = gtNewIconNode(0, TYP_I_IMPL); + } + + GenTree* tree; + + GenTreeCall::Use* args = gtNewCallArgs(pInvokeFrameVar, gtNewIconEmbMethHndNode(info.compMethodHnd), stubArgument); - tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, gtNewCallArgs(tree)); + tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, args); fgEnsureFirstBBisScratch(); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 078a745f39f..37d93fb9a54 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -17335,6 +17335,12 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF)); } #endif +#if defined(TARGET_X86) + else if (info.compCallConv != CorInfoCallConvExtension::Managed) + { + op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF)); + } +#endif else { // return void diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index be0f5ccfe05..a9abde124fc 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -235,7 +235,29 @@ void Compiler::lvaInitTypeRef() //------------------------------------------------------------------------- InitVarDscInfo varDscInfo; - varDscInfo.Init(lvaTable, hasRetBuffArg); +#ifdef TARGET_X86 + // x86 unmanaged calling conventions limit the number of registers supported + // for accepting arguments. As a result, we need to modify the number of registers + // when we emit a method with an unmanaged calling convention. + switch (info.compCallConv) + { + case CorInfoCallConvExtension::Thiscall: + // In thiscall the this parameter goes into a register. + varDscInfo.Init(lvaTable, hasRetBuffArg, 1, 0); + break; + case CorInfoCallConvExtension::C: + case CorInfoCallConvExtension::Stdcall: + varDscInfo.Init(lvaTable, hasRetBuffArg, 0, 0); + break; + case CorInfoCallConvExtension::Managed: + case CorInfoCallConvExtension::Fastcall: + default: + varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG); + break; + } +#else + varDscInfo.Init(lvaTable, hasRetBuffArg, MAX_REG_ARG, MAX_FLOAT_REG_ARG); +#endif lvaInitArgs(&varDscInfo); @@ -513,14 +535,16 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf info.compRetBuffArg = varDscInfo->varNum; varDsc->lvType = TYP_BYREF; varDsc->lvIsParam = 1; - varDsc->lvIsRegArg = 1; + varDsc->lvIsRegArg = 0; if (useFixedRetBufReg && hasFixedRetBuffReg()) { + varDsc->lvIsRegArg = 1; varDsc->SetArgReg(theFixedRetBuffReg()); } - else + else if (varDscInfo->canEnreg(TYP_INT)) { + varDsc->lvIsRegArg = 1; unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT); varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum)); } @@ -557,10 +581,10 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf } #endif // FEATURE_SIMD - assert(isValidIntArgReg(varDsc->GetArgReg())); + assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg())); #ifdef DEBUG - if (verbose) + if (varDsc->lvIsRegArg && verbose) { printf("'__retBuf' passed in register %s\n", getRegName(varDsc->GetArgReg())); } @@ -591,7 +615,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #if defined(TARGET_X86) // Only (some of) the implicit args are enregistered for varargs - varDscInfo->maxIntRegArgNum = info.compIsVarArgs ? varDscInfo->intRegArgNum : MAX_REG_ARG; + if (info.compIsVarArgs) + { + varDscInfo->maxIntRegArgNum = varDscInfo->intRegArgNum; + } #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) // On System V type environment the float registers are not indexed together with the int ones. varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; @@ -5345,7 +5372,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() This is all relative to our Virtual '0' */ - if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + if (info.compArgOrder == Target::ARG_ORDER_L2R) { argOffs = compArgSize; } @@ -5357,9 +5384,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() noway_assert(compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES); #endif -#ifdef TARGET_X86 - argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; -#endif + if (info.compArgOrder == Target::ARG_ORDER_L2R) + { + argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; + } // Update the arg initial register locations. lvaUpdateArgsWithInitialReg(); @@ -5398,11 +5426,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() if (info.compRetBuffArg != BAD_VAR_NUM) { noway_assert(lclNum == info.compRetBuffArg); - noway_assert(lvaTable[lclNum].lvIsRegArg); -#ifndef TARGET_X86 argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); -#endif // TARGET_X86 lclNum++; } @@ -5553,7 +5578,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, noway_assert(lclNum < info.compArgsCount); noway_assert(argSize); - if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + if (info.compArgOrder == Target::ARG_ORDER_L2R) { argOffs -= argSize; } @@ -5621,7 +5646,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } } - if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) + if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) { argOffs += argSize; } @@ -5646,7 +5671,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, noway_assert(lclNum < info.compArgsCount); noway_assert(argSize); - if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + if (info.compArgOrder == Target::ARG_ORDER_L2R) { argOffs -= argSize; } @@ -5925,7 +5950,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } } - if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) + if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) { argOffs += argSize; } diff --git a/src/coreclr/jit/register_arg_convention.h b/src/coreclr/jit/register_arg_convention.h index 7b3199b03af..a1816ba897e 100644 --- a/src/coreclr/jit/register_arg_convention.h +++ b/src/coreclr/jit/register_arg_convention.h @@ -33,15 +33,15 @@ struct InitVarDscInfo public: // set to initial values - void Init(LclVarDsc* lvaTable, bool _hasRetBufArg) + void Init(LclVarDsc* lvaTable, bool _hasRetBufArg, unsigned _maxIntRegArgNum, unsigned _maxFloatRegArgNum) { hasRetBufArg = _hasRetBufArg; varDsc = &lvaTable[0]; // the first argument LclVar 0 varNum = 0; // the first argument varNum 0 intRegArgNum = 0; floatRegArgNum = 0; - maxIntRegArgNum = MAX_REG_ARG; - maxFloatRegArgNum = MAX_FLOAT_REG_ARG; + maxIntRegArgNum = _maxIntRegArgNum; + maxFloatRegArgNum = _maxFloatRegArgNum; #ifdef TARGET_ARM fltArgSkippedRegMask = RBM_NONE; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 633f5dc34d2..d4d501e5fd7 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -436,7 +436,7 @@ typedef unsigned char regNumberSmall; #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved EBP and return address #define MAX_REG_ARG 2 - + #define MAX_FLOAT_REG_ARG 0 #define REG_ARG_FIRST REG_ECX #define REG_ARG_LAST REG_EDX @@ -1620,6 +1620,7 @@ public: ARG_ORDER_L2R }; static const enum ArgOrder g_tgtArgOrder; + static const enum ArgOrder g_tgtUnmanagedArgOrder; }; #if defined(DEBUG) || defined(LATE_DISASM) || DUMP_GC_TABLES diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp index 372c4dffc27..4ac48cb229f 100644 --- a/src/coreclr/jit/targetamd64.cpp +++ b/src/coreclr/jit/targetamd64.cpp @@ -12,8 +12,9 @@ #include "target.h" -const char* Target::g_tgtCPUName = "x64"; -const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const char* Target::g_tgtCPUName = "x64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off #ifdef UNIX_AMD64_ABI diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp index da125cbb436..dbb986a0e05 100644 --- a/src/coreclr/jit/targetarm.cpp +++ b/src/coreclr/jit/targetarm.cpp @@ -12,8 +12,9 @@ #include "target.h" -const char* Target::g_tgtCPUName = "arm"; -const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const char* Target::g_tgtCPUName = "arm"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3}; diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp index 8f5481a83e0..dcec1db6c52 100644 --- a/src/coreclr/jit/targetarm64.cpp +++ b/src/coreclr/jit/targetarm64.cpp @@ -12,8 +12,9 @@ #include "target.h" -const char* Target::g_tgtCPUName = "arm64"; -const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const char* Target::g_tgtCPUName = "arm64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7}; diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp index fab7286782a..d5ed8b0bbf6 100644 --- a/src/coreclr/jit/targetx86.cpp +++ b/src/coreclr/jit/targetx86.cpp @@ -12,8 +12,9 @@ #include "target.h" -const char* Target::g_tgtCPUName = "x86"; -const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_L2R; +const char* Target::g_tgtCPUName = "x86"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_L2R; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off const regNumber intArgRegs [] = {REG_ECX, REG_EDX}; |