diff options
author | kobalicek <kobalicek.petr@gmail.com> | 2022-02-16 14:32:50 +0300 |
---|---|---|
committer | kobalicek <kobalicek.petr@gmail.com> | 2022-02-16 18:42:32 +0300 |
commit | 28c4d8c528527141955006f09124ce672ddfbe3f (patch) | |
tree | 338282fc8769f5c90422d2e32fd38277c413bc12 | |
parent | 23ddf56b00f47d8aa0c82ad225e4b3a92661da7e (diff) |
[Opt] Optimized memory footprint of Compiler when generating huge code
-rw-r--r-- | src/asmjit/core/raassignment_p.h | 28 | ||||
-rw-r--r-- | src/asmjit/core/radefs_p.h | 8 | ||||
-rw-r--r-- | src/asmjit/core/ralocal.cpp | 70 | ||||
-rw-r--r-- | src/asmjit/core/ralocal_p.h | 14 | ||||
-rw-r--r-- | src/asmjit/core/rapass.cpp | 81 | ||||
-rw-r--r-- | src/asmjit/core/rapass_p.h | 29 |
6 files changed, 101 insertions, 129 deletions
diff --git a/src/asmjit/core/raassignment_p.h b/src/asmjit/core/raassignment_p.h index 22a97e2..5418329 100644 --- a/src/asmjit/core/raassignment_p.h +++ b/src/asmjit/core/raassignment_p.h @@ -82,6 +82,12 @@ public: size_t size = sizeOf(count); memcpy(this, other, size); } + + inline void unassign(RegGroup group, uint32_t physId, uint32_t indexInWorkIds) noexcept { + assigned.clear(group, Support::bitMask(physId)); + dirty.clear(group, Support::bitMask(physId)); + workIds[indexInWorkIds] = kWorkNone; + } }; struct WorkToPhysMap { @@ -304,6 +310,28 @@ public: _physToWorkIds.swap(other._physToWorkIds); } + inline void assignWorkIdsFromPhysIds() noexcept { + memset(_workToPhysMap, uint8_t(BaseReg::kIdBad), WorkToPhysMap::sizeOf(_layout.workCount)); + + for (RegGroup group : RegGroupVirtValues{}) { + uint32_t physBaseIndex = _layout.physIndex[group]; + Support::BitWordIterator<RegMask> it(_physToWorkMap->assigned[group]); + + while (it.hasNext()) { + uint32_t physId = it.next(); + uint32_t workId = _physToWorkMap->workIds[physBaseIndex + physId]; + + ASMJIT_ASSERT(workId != kWorkNone); + _workToPhysMap->physIds[workId] = uint8_t(physId); + } + } + } + + inline void copyFrom(const PhysToWorkMap* physToWorkMap) noexcept { + memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal)); + assignWorkIdsFromPhysIds(); + } + inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept { memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal)); memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount)); diff --git a/src/asmjit/core/radefs_p.h b/src/asmjit/core/radefs_p.h index 426ac29..15c50ff 100644 --- a/src/asmjit/core/radefs_p.h +++ b/src/asmjit/core/radefs_p.h @@ -271,8 +271,12 @@ struct RARegMask { } template<class Operator> - inline void op(RegGroup group, uint32_t input) noexcept { - _masks[group] = Operator::op(_masks[group], input); + inline void op(RegGroup group, RegMask mask) noexcept { + _masks[group] = Operator::op(_masks[group], mask); + } + + inline void clear(RegGroup group, RegMask mask) noexcept { + _masks[group] = _masks[group] & ~mask; } //! \} diff --git a/src/asmjit/core/ralocal.cpp b/src/asmjit/core/ralocal.cpp index 54bc524..38cf341 100644 --- a/src/asmjit/core/ralocal.cpp +++ b/src/asmjit/core/ralocal.cpp @@ -38,7 +38,9 @@ Error RALocalAllocator::init() noexcept { physToWorkMap = _pass->newPhysToWorkMap(); workToPhysMap = _pass->newWorkToPhysMap(); - if (!physToWorkMap || !workToPhysMap) + _tmpWorkToPhysMap = _pass->newWorkToPhysMap(); + + if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap) return DebugUtils::errored(kErrorOutOfMemory); _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs()); @@ -122,26 +124,18 @@ Error RALocalAllocator::makeInitialAssignment() noexcept { return kErrorOk; } -Error RALocalAllocator::replaceAssignment( - const PhysToWorkMap* physToWorkMap, - const WorkToPhysMap* workToPhysMap) noexcept { - - _curAssignment.copyFrom(physToWorkMap, workToPhysMap); +Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept { + _curAssignment.copyFrom(physToWorkMap); return kErrorOk; } -Error RALocalAllocator::switchToAssignment( - PhysToWorkMap* dstPhysToWorkMap, - WorkToPhysMap* dstWorkToPhysMap, - const ZoneBitVector& liveIn, - bool dstReadOnly, - bool tryMode) noexcept { - +Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept { RAAssignment dst; RAAssignment& cur = _curAssignment; dst.initLayout(_pass->_physRegCount, _pass->workRegs()); - dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap); + dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap); + dst.assignWorkIdsFromPhysIds(); if (tryMode) return kErrorOk; @@ -329,24 +323,27 @@ Cleared: if (!tryMode) { // Here is a code that dumps the conflicting part if something fails here: - // if (!dst.equals(cur)) { - // uint32_t physTotal = dst._layout.physTotal; - // uint32_t workCount = dst._layout.workCount; + // if (!dst.equals(cur)) { + // uint32_t physTotal = dst._layout.physTotal; + // uint32_t workCount = dst._layout.workCount; // - // for (uint32_t physId = 0; physId < physTotal; physId++) { - // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId]; - // uint32_t curWorkId = cur._physToWorkMap->workIds[physId]; - // if (dstWorkId != curWorkId) - // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId); - // } + // fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp)); + // fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp)); // - // for (uint32_t workId = 0; workId < workCount; workId++) { - // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId]; - // uint32_t curPhysId = cur._workToPhysMap->physIds[workId]; - // if (dstPhysId != curPhysId) - // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId); - // } + // for (uint32_t physId = 0; physId < physTotal; physId++) { + // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId]; + // uint32_t curWorkId = cur._physToWorkMap->workIds[physId]; + // if (dstWorkId != curWorkId) + // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId); // } + // + // for (uint32_t workId = 0; workId < workCount; workId++) { + // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId]; + // uint32_t curPhysId = cur._workToPhysMap->physIds[workId]; + // if (dstPhysId != curPhysId) + // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId); + // } + // } ASMJIT_ASSERT(dst.equals(cur)); } @@ -981,12 +978,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co // Use TryMode of `switchToAssignment()` if possible. if (target->hasEntryAssignment()) { - ASMJIT_PROPAGATE(switchToAssignment( - target->entryPhysToWorkMap(), - target->entryWorkToPhysMap(), - target->liveIn(), - target->isAllocated(), - true)); + ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true)); } ASMJIT_PROPAGATE(allocInst(node)); @@ -997,12 +989,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co BaseNode* prevCursor = _cc->setCursor(injectionPoint); _tmpAssignment.copyFrom(_curAssignment); - ASMJIT_PROPAGATE(switchToAssignment( - target->entryPhysToWorkMap(), - target->entryWorkToPhysMap(), - target->liveIn(), - target->isAllocated(), - false)); + ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false)); BaseNode* curCursor = _cc->cursor(); if (curCursor != injectionPoint) { @@ -1060,7 +1047,6 @@ Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, if (!sharedAssignment.empty()) { ASMJIT_PROPAGATE(switchToAssignment( sharedAssignment.physToWorkMap(), - sharedAssignment.workToPhysMap(), sharedAssignment.liveIn(), true, // Read-only. false // Try-mode. diff --git a/src/asmjit/core/ralocal_p.h b/src/asmjit/core/ralocal_p.h index 05467c5..b40e867 100644 --- a/src/asmjit/core/ralocal_p.h +++ b/src/asmjit/core/ralocal_p.h @@ -57,6 +57,9 @@ public: //! TiedReg's total counter. RARegCount _tiedCount; + //! Temporary workToPhysMap that can be used freely by the allocator. + WorkToPhysMap* _tmpWorkToPhysMap; + //! \name Construction & Destruction //! \{ @@ -113,9 +116,7 @@ public: Error makeInitialAssignment() noexcept; - Error replaceAssignment( - const PhysToWorkMap* physToWorkMap, - const WorkToPhysMap* workToPhysMap) noexcept; + Error replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept; //! Switch to the given assignment by reassigning all register and emitting code that reassigns them. //! This is always used to switch to a previously stored assignment. @@ -123,12 +124,7 @@ public: //! If `tryMode` is true then the final assignment doesn't have to be exactly same as specified by `dstPhysToWorkMap` //! and `dstWorkToPhysMap`. This mode is only used before conditional jumps that already have assignment to generate //! a code sequence that is always executed regardless of the flow. - Error switchToAssignment( - PhysToWorkMap* dstPhysToWorkMap, - WorkToPhysMap* dstWorkToPhysMap, - const ZoneBitVector& liveIn, - bool dstReadOnly, - bool tryMode) noexcept; + Error switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept; inline Error spillRegsBeforeEntry(RABlock* block) noexcept { return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs()); diff --git a/src/asmjit/core/rapass.cpp b/src/asmjit/core/rapass.cpp index 79709f6..0e87ab6 100644 --- a/src/asmjit/core/rapass.cpp +++ b/src/asmjit/core/rapass.cpp @@ -1483,18 +1483,12 @@ Error BaseRAPass::runLocalAllocator() noexcept { cc()->_setCursor(unconditionalJump ? prev->prev() : prev); if (consecutive->hasEntryAssignment()) { - ASMJIT_PROPAGATE( - lra.switchToAssignment( - consecutive->entryPhysToWorkMap(), - consecutive->entryWorkToPhysMap(), - consecutive->liveIn(), - consecutive->isAllocated(), - false)); + ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false)); } else { ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive)); ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment)); - lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap(), consecutive->entryWorkToPhysMap()); + lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap()); } } @@ -1526,7 +1520,7 @@ Error BaseRAPass::runLocalAllocator() noexcept { } // If we switched to some block we have to update the local allocator. - lra.replaceAssignment(block->entryPhysToWorkMap(), block->entryWorkToPhysMap()); + lra.replaceAssignment(block->entryPhysToWorkMap()); } _clobberedRegs.op<Support::Or>(lra._clobberedRegs); @@ -1546,12 +1540,10 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo } PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap()); - WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap()); - - if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap)) + if (ASMJIT_UNLIKELY(!physToWorkMap)) return DebugUtils::errored(kErrorOutOfMemory); - block->setEntryAssignment(physToWorkMap, workToPhysMap); + block->setEntryAssignment(physToWorkMap); // True if this is the first (entry) block, nothing to do in this case. if (block == fromBlock) { @@ -1562,10 +1554,6 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo return kErrorOk; } - RAAssignment as; - as.initLayout(_physRegCount, workRegs()); - as.initMaps(physToWorkMap, workToPhysMap); - const ZoneBitVector& liveOut = fromBlock->liveOut(); const ZoneBitVector& liveIn = block->liveIn(); @@ -1578,94 +1566,85 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo RAWorkReg* workReg = workRegById(workId); RegGroup group = workReg->group(); - uint32_t physId = as.workToPhysId(group, workId); + uint32_t physId = fromAssignment.workToPhysId(group, workId); if (physId != RAAssignment::kPhysNone) - as.unassign(group, workId, physId); + physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId); } } - return blockEntryAssigned(as); + return blockEntryAssigned(physToWorkMap); } Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept { ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty()); PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap()); - WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap()); - - if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap)) + if (ASMJIT_UNLIKELY(!physToWorkMap)) return DebugUtils::errored(kErrorOutOfMemory); - _sharedAssignments[sharedAssignmentId].assignMaps(physToWorkMap, workToPhysMap); + _sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap); + ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn; ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount())); - RAAssignment as; - as.initLayout(_physRegCount, workRegs()); - Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {}; - for (RABlock* block : blocks()) { if (block->sharedAssignmentId() == sharedAssignmentId) { ASMJIT_ASSERT(!block->hasEntryAssignment()); PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap()); - WorkToPhysMap* entryWorkToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap()); - - if (ASMJIT_UNLIKELY(!entryPhysToWorkMap || !entryWorkToPhysMap)) + if (ASMJIT_UNLIKELY(!entryPhysToWorkMap)) return DebugUtils::errored(kErrorOutOfMemory); - block->setEntryAssignment(entryPhysToWorkMap, entryWorkToPhysMap); - as.initMaps(entryPhysToWorkMap, entryWorkToPhysMap); + block->setEntryAssignment(entryPhysToWorkMap); const ZoneBitVector& liveIn = block->liveIn(); sharedLiveIn.or_(liveIn); for (RegGroup group : RegGroupVirtValues{}) { sharedAssigned[group] |= entryPhysToWorkMap->assigned[group]; + + uint32_t physBaseIndex = _physRegIndex.get(group); Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]); while (it.hasNext()) { uint32_t physId = it.next(); - uint32_t workId = as.physToWorkId(group, physId); + uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId]; if (!liveIn.bitAt(workId)) - as.unassign(group, workId, physId); + entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId); } } } } - { - as.initMaps(physToWorkMap, workToPhysMap); - - for (RegGroup group : RegGroupVirtValues{}) { - Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]); + for (RegGroup group : RegGroupVirtValues{}) { + uint32_t physBaseIndex = _physRegIndex.get(group); + Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]); - while (it.hasNext()) { - uint32_t physId = it.next(); - if (as.isPhysAssigned(group, physId)) { - uint32_t workId = as.physToWorkId(group, physId); - as.unassign(group, workId, physId); - } - } + while (it.hasNext()) { + uint32_t physId = it.next(); + if (Support::bitTest(physToWorkMap->assigned[group], physId)) + physToWorkMap->unassign(group, physId, physBaseIndex + physId); } } - return blockEntryAssigned(as); + return blockEntryAssigned(physToWorkMap); } -Error BaseRAPass::blockEntryAssigned(const RAAssignment& as) noexcept { +Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept { // Complex allocation strategy requires to record register assignments upon block entry (or per shared state). for (RegGroup group : RegGroupVirtValues{}) { if (!_strategy[group].isComplex()) continue; - Support::BitWordIterator<RegMask> it(as.assigned(group)); + uint32_t physBaseIndex = _physRegIndex[group]; + Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]); + while (it.hasNext()) { uint32_t physId = it.next(); - uint32_t workId = as.physToWorkId(group, physId); + uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId]; RAWorkReg* workReg = workRegById(workId); workReg->addAllocatedMask(Support::bitMask(physId)); diff --git a/src/asmjit/core/rapass_p.h b/src/asmjit/core/rapass_p.h index 098c5c9..52c3ec4 100644 --- a/src/asmjit/core/rapass_p.h +++ b/src/asmjit/core/rapass_p.h @@ -129,10 +129,8 @@ public: //! Scratch registers used at exit, by a terminator instruction. RegMask _exitScratchGpRegs = 0; - //! Register assignment (PhysToWork) on entry. + //! Register assignment on entry. PhysToWorkMap* _entryPhysToWorkMap = nullptr; - //! Register assignment (WorkToPhys) on entry. - WorkToPhysMap* _entryWorkToPhysMap = nullptr; //! \} @@ -247,13 +245,8 @@ public: } inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; } - inline WorkToPhysMap* entryWorkToPhysMap() const noexcept { return _entryWorkToPhysMap; } inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; } - - inline void setEntryAssignment(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept { - _entryPhysToWorkMap = physToWorkMap; - _entryWorkToPhysMap = workToPhysMap; - } + inline void setEntryAssignment(PhysToWorkMap* physToWorkMap) noexcept { _entryPhysToWorkMap = physToWorkMap; } //! \} @@ -616,8 +609,6 @@ public: ZoneBitVector _liveIn {}; //! Register assignment (PhysToWork). PhysToWorkMap* _physToWorkMap = nullptr; - //! Register assignment (WorkToPhys). - WorkToPhysMap* _workToPhysMap = nullptr; //! \} @@ -632,12 +623,7 @@ public: inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; } inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; } - inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; } - - inline void assignMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept { - _physToWorkMap = physToWorkMap; - _workToPhysMap = workToPhysMap; - } + inline void assignPhysToWorkMap(PhysToWorkMap* physToWorkMap) noexcept { _physToWorkMap = physToWorkMap; } //! \} }; @@ -1066,13 +1052,6 @@ public: return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t))); } - inline WorkToPhysMap* cloneWorkToPhysMap(const WorkToPhysMap* map) noexcept { - size_t size = WorkToPhysMap::sizeOf(_workRegs.size()); - if (ASMJIT_UNLIKELY(size == 0)) - return const_cast<WorkToPhysMap*>(map); - return static_cast<WorkToPhysMap*>(zone()->dup(map, size)); - } - //! \name Liveness Analysis & Statistics //! \{ @@ -1110,7 +1089,7 @@ public: //! Called after the RA assignment has been assigned to a block. //! //! This cannot change the assignment, but can examine it. - Error blockEntryAssigned(const RAAssignment& as) noexcept; + Error blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept; //! \} |