Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/asmjit/asmjit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkobalicek <kobalicek.petr@gmail.com>2022-04-10 01:32:57 +0300
committerkobalicek <kobalicek.petr@gmail.com>2022-04-10 01:32:57 +0300
commita4cb51b532af0f8137c4182914244c3b05d7745f (patch)
treec47114cb375ca70fba63fb5081189f0f8f02a394
parent8fdee13aeae1cd4d427cad7dc261c0e19b660c10 (diff)
[Bug] Fixed not cloberring YMM|ZMM registers in function calls that preserve only low 128-bits of vector registersdead_movs
-rw-r--r--src/asmjit/core/ralocal.cpp28
-rw-r--r--test/asmjit_test_compiler_x86.cpp119
2 files changed, 147 insertions, 0 deletions
diff --git a/src/asmjit/core/ralocal.cpp b/src/asmjit/core/ralocal.cpp
index 38cf341..b4d9244 100644
--- a/src/asmjit/core/ralocal.cpp
+++ b/src/asmjit/core/ralocal.cpp
@@ -836,6 +836,34 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// STEP 9
// ------
//
+ // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
+
+ if (node->isInvoke() && group == RegGroup::kVec) {
+ const InvokeNode* invokeNode = node->as<InvokeNode>();
+
+ RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
+ if (maybeClobberedRegs) {
+ uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
+ Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
+
+ do {
+ uint32_t physId = it.next();
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+ RAWorkReg* workReg = workRegById(workId);
+ uint32_t virtSize = workReg->virtReg()->virtSize();
+
+ if (virtSize > saveRestoreVecSize) {
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+ }
+
+ } while (it.hasNext());
+ }
+ }
+
+ // STEP 10
+ // -------
+ //
// Assign OUT registers.
if (outPending) {
diff --git a/test/asmjit_test_compiler_x86.cpp b/test/asmjit_test_compiler_x86.cpp
index 7dbe955..a3a067d 100644
--- a/test/asmjit_test_compiler_x86.cpp
+++ b/test/asmjit_test_compiler_x86.cpp
@@ -3819,6 +3819,124 @@ public:
static uint32_t calledFunc(uint32_t x) { return x + 1; }
};
+// x86::Compiler - X86Test_FuncCallAVXClobber
+// ==========================================
+
+class X86Test_FuncCallAVXClobber : public X86TestCase {
+public:
+ X86Test_FuncCallAVXClobber() : X86TestCase("FuncCallAVXClobber") {}
+
+ static void add(TestApp& app) {
+ const CpuInfo& cpuInfo = CpuInfo::host();
+
+ if (cpuInfo.features().x86().hasAVX2() && sizeof(void*) == 8)
+ app.add(new X86Test_FuncCallAVXClobber());
+ }
+
+ virtual void compile(x86::Compiler& cc) {
+ FuncNode* mainFunc = cc.addFunc(FuncSignatureT<void, void*, const void*, const void*>(CallConvId::kHost));
+ mainFunc->frame().setAvxEnabled();
+ mainFunc->frame().setAvxCleanup();
+
+ // We need a Windows calling convention to test this properly also on a non-Windows machine.
+ FuncNode* helperFunc = cc.newFunc(FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
+ helperFunc->frame().setAvxEnabled();
+ helperFunc->frame().setAvxCleanup();
+
+ {
+ size_t i;
+
+ x86::Gp dPtr = cc.newIntPtr("dPtr");
+ x86::Gp aPtr = cc.newIntPtr("aPtr");
+ x86::Gp bPtr = cc.newIntPtr("bPtr");
+ x86::Gp tPtr = cc.newIntPtr("tPtr");
+ x86::Ymm acc[8];
+ x86::Mem stack = cc.newStack(32, 1, "stack");
+
+ mainFunc->setArg(0, dPtr);
+ mainFunc->setArg(1, aPtr);
+ mainFunc->setArg(2, bPtr);
+
+ cc.lea(tPtr, stack);
+ for (i = 0; i < 8; i++) {
+ acc[i] = cc.newYmm("acc%zu", i);
+ cc.vmovdqu(acc[i], x86::ptr(aPtr));
+ }
+
+ InvokeNode* invokeNode;
+ cc.invoke(&invokeNode,
+ helperFunc->label(),
+ FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
+ invokeNode->setArg(0, tPtr);
+ invokeNode->setArg(1, bPtr);
+
+ for (i = 1; i < 8; i++) {
+ cc.vpaddd(acc[0], acc[0], acc[i]);
+ }
+
+ cc.vpaddd(acc[0], acc[0], x86::ptr(tPtr));
+ cc.vmovdqu(x86::ptr(dPtr), acc[0]);
+
+ cc.endFunc();
+ }
+
+ {
+ cc.addFunc(helperFunc);
+
+ x86::Gp dPtr = cc.newIntPtr("dPtr");
+ x86::Gp aPtr = cc.newIntPtr("aPtr");
+
+ helperFunc->setArg(0, dPtr);
+ helperFunc->setArg(1, aPtr);
+
+ x86::Gp tmp = cc.newIntPtr("tmp");
+ x86::Ymm acc = cc.newYmm("acc");
+
+ cc.mov(tmp, 1);
+ cc.vmovd(acc.xmm(), tmp);
+ cc.vpbroadcastd(acc, acc.xmm());
+ cc.vpaddd(acc, acc, x86::ptr(aPtr));
+ cc.vmovdqu(x86::ptr(dPtr), acc);
+
+ cc.endFunc();
+ }
+ }
+
+ virtual bool run(void* _func, String& result, String& expect) {
+ typedef void (*Func)(void*, const void*, const void*);
+ Func func = ptr_as_func<Func>(_func);
+
+ size_t i;
+
+ static const uint32_t aData[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+ static const uint32_t bData[8] = { 6, 3, 5, 9, 1, 8, 7, 2 };
+
+ uint32_t resultData[8];
+ uint32_t expectData[8];
+
+ for (i = 0; i < 8; i++)
+ expectData[i] = aData[i] * 8 + bData[i] + 1;
+
+ func(resultData, aData, bData);
+
+ result.assign("{");
+ expect.assign("{");
+
+ for (i = 0; i < 8; i++) {
+ result.appendFormat("%u", resultData[i]);
+ expect.appendFormat("%u", expectData[i]);
+
+ if (i != 7) result.append(", ");
+ if (i != 7) expect.append(", ");
+ }
+
+ result.append("}");
+ expect.append("}");
+
+ return result == expect;
+ }
+};
+
// x86::Compiler - X86Test_MiscLocalConstPool
// ==========================================
@@ -4186,6 +4304,7 @@ void compiler_add_x86_tests(TestApp& app) {
app.addT<X86Test_FuncCallMisc4>();
app.addT<X86Test_FuncCallMisc5>();
app.addT<X86Test_FuncCallMisc6>();
+ app.addT<X86Test_FuncCallAVXClobber>();
// Miscellaneous tests.
app.addT<X86Test_MiscLocalConstPool>();