From 77b75ca53f464d4400d0b86458f97037b35a4999 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 13 Apr 2022 19:39:39 -0700 Subject: [BOLT][perf2bolt] Fix base address calculation for shared objects When processing profile data for shared object or PIE, perf2bolt needs to calculate base address of the binary based on the map info reported by the perf tool. When the mapping data provided is for the second (or any other than the first) segment and the segment's file offset does not match its memory offset, perf2bolt uses wrong assumption about the binary base address. Add a function to calculate binary base address using the reported memory mapping and use the returned base for further address adjustments. Reviewed By: yota9 Differential Revision: https://reviews.llvm.org/D123755 --- bolt/include/bolt/Core/BinaryContext.h | 9 ++++ bolt/include/bolt/Profile/DataAggregator.h | 23 ++++---- bolt/lib/Core/BinaryContext.cpp | 16 ++++++ bolt/lib/Profile/DataAggregator.cpp | 34 ++++++++---- bolt/unittests/Core/BinaryContext.cpp | 85 ++++++++++++++++++++++++++++++ bolt/unittests/Core/CMakeLists.txt | 1 + 6 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 bolt/unittests/Core/BinaryContext.cpp (limited to 'bolt') diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 0257c907f588..c1b6887a55c8 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -970,6 +970,15 @@ public: Sections.end())); } + /// Return base address for the shared object or PIE based on the segment + /// mapping information. \p MMapAddress is an address where one of the + /// segments was mapped. \p FileOffset is the offset in the file of the + /// mapping. Note that \p FileOffset should be page-aligned and could be + /// different from the file offset of the segment which could be unaligned. + /// If no segment is found that matches \p FileOffset, return NoneType(). + Optional getBaseAddressForMapping(uint64_t MMapAddress, + uint64_t FileOffset) const; + /// Check if the address belongs to this binary's static allocation space. bool containsAddress(uint64_t Address) const { return Address >= FirstAllocAddress && Address < LayoutStartAddress; diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 5dc9273dfa32..48755c3261c0 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -168,14 +168,15 @@ private: /// from the file name in BC. std::string BuildIDBinaryName; - /// Memory map info for a single file + /// Memory map info for a single file as recorded in perf.data struct MMapInfo { - uint64_t BaseAddress; - uint64_t Size; - uint64_t Offset; - int32_t PID{-1}; - bool Forked{false}; - uint64_t Time{0ULL}; // time in micro seconds + uint64_t BaseAddress{0}; /// Base address of the mapped binary. + uint64_t MMapAddress{0}; /// Address of the executable segment. + uint64_t Size{0}; /// Size of the mapping. + uint64_t Offset{0}; /// File offset of the mapped segment. + int32_t PID{-1}; /// Process ID. + bool Forked{false}; /// Was the process forked? + uint64_t Time{0ULL}; /// Time in micro seconds. }; /// Per-PID map info for the binary @@ -420,12 +421,8 @@ private: /// correspond to the binary allocated address space, are adjusted to avoid /// conflicts. void adjustAddress(uint64_t &Address, const MMapInfo &MMI) const { - if (Address >= MMI.BaseAddress && Address < MMI.BaseAddress + MMI.Size) { - // NOTE: Assumptions about the binary segment load table (PH for ELF) - // Segment file offset equals virtual address (which is true for .so) - // There aren't multiple executable segments loaded because MMapInfo - // doesn't support them. - Address -= MMI.BaseAddress - MMI.Offset; + if (Address >= MMI.MMapAddress && Address < MMI.MMapAddress + MMI.Size) { + Address -= MMI.BaseAddress; } else if (Address < MMI.Size) { // Make sure the address is not treated as belonging to the binary. Address = (-1ULL); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index a848fdcd7dbe..64d43ec6c8e0 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1690,6 +1690,22 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, } } +Optional +BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, + uint64_t FileOffset) const { + // Find a segment with a matching file offset. + for (auto &KV : SegmentMapInfo) { + const SegmentInfo &SegInfo = KV.second; + if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { + // Use segment's aligned memory offset to calculate the base address. + const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); + return MMapAddress - MemOffset; + } + } + + return NoneType(); +} + ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { auto SI = AddressToSection.upper_bound(Address); if (SI != AddressToSection.begin()) { diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 37e0aa08dddf..5b7853ee2b25 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1943,7 +1943,7 @@ DataAggregator::parseMMapEvent() { } const StringRef BaseAddressStr = Line.split('[').second.split('(').first; - if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) { + if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { reportError("expected base address"); Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; return make_error_code(llvm::errc::io_error); @@ -2003,7 +2003,7 @@ std::error_code DataAggregator::parseMMapEvents() { dbgs() << "FileName -> mmap info:\n"; for (const std::pair &Pair : GlobalMMapInfo) dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" - << Twine::utohexstr(Pair.second.BaseAddress) << ", " + << Twine::utohexstr(Pair.second.MMapAddress) << ", " << Twine::utohexstr(Pair.second.Size) << " @ " << Twine::utohexstr(Pair.second.Offset) << "]\n"; }); @@ -2017,29 +2017,45 @@ std::error_code DataAggregator::parseMMapEvents() { auto Range = GlobalMMapInfo.equal_range(NameToUse); for (auto I = Range.first; I != Range.second; ++I) { - const MMapInfo &MMapInfo = I->second; - if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) { + MMapInfo &MMapInfo = I->second; + if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { // Check that the binary mapping matches one of the segments. bool MatchFound = false; for (auto &KV : BC->SegmentMapInfo) { SegmentInfo &SegInfo = KV.second; - // The mapping is page-aligned and hence the BaseAddress could be + // The mapping is page-aligned and hence the MMapAddress could be // different from the segment start address. We cannot know the page // size of the mapping, but we know it should not exceed the segment // alignment value. Hence we are performing an approximate check. - if (SegInfo.Address >= MMapInfo.BaseAddress && - SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) { + if (SegInfo.Address >= MMapInfo.MMapAddress && + SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { MatchFound = true; break; } } if (!MatchFound) { errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse - << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n'; + << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; continue; } } + // Set base address for shared objects. + if (!BC->HasFixedLoadAddress) { + Optional BaseAddress = + BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); + if (!BaseAddress) { + errs() << "PERF2BOLT-WARNING: unable to find base address of the " + "binary when memory mapped at 0x" + << Twine::utohexstr(MMapInfo.MMapAddress) + << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) + << ". Ignoring profile data for this mapping\n"; + continue; + } else { + MMapInfo.BaseAddress = *BaseAddress; + } + } + BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); } @@ -2110,7 +2126,7 @@ std::error_code DataAggregator::parseTaskEvents() { LLVM_DEBUG({ for (std::pair &MMI : BinaryMMapInfo) outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") - << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x" + << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" << Twine::utohexstr(MMI.second.Size) << ")\n"; }); diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp new file mode 100644 index 000000000000..6e5c6c4bcd6b --- /dev/null +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -0,0 +1,85 @@ +#include "bolt/Core/BinaryContext.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; +using namespace bolt; + +namespace { +struct BinaryContextTester : public testing::TestWithParam { + void SetUp() override { + initalizeLLVM(); + prepareElf(); + initializeBOLT(); + } + +protected: + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void prepareElf() { + memcpy(ElfBuf, "\177ELF", 4); + ELF64LE::Ehdr *EHdr = reinterpret_cast(ElfBuf); + EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; + EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; + EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64; + MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); + ObjFile = cantFail(ObjectFile::createObjectFile(Source)); + } + + void initializeBOLT() { + BC = cantFail(BinaryContext::createBinaryContext( + ObjFile.get(), true, DWARFContext::create(*ObjFile.get()))); + ASSERT_FALSE(!BC); + } + + char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; + std::unique_ptr ObjFile; + std::unique_ptr BC; +}; +} // namespace + +#ifdef X86_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester, + ::testing::Values(Triple::x86_64)); + +#endif + +#ifdef AARCH64_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester, + ::testing::Values(Triple::aarch64)); + +#endif + +TEST_P(BinaryContextTester, BaseAddress) { + // Check that base address calculation is correct for a binary with the + // following segment layout: + BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000}; + BC->SegmentMapInfo[0x10e8d2b4] = + SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000}; + BC->SegmentMapInfo[0x4a3bddc0] = + SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000}; + BC->SegmentMapInfo[0x4b84d5e8] = + SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000}; + + Optional BaseAddress = + BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000); + ASSERT_TRUE(BaseAddress.hasValue()); + ASSERT_EQ(*BaseAddress, 0x7f13e46c9000ULL); + + BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000); + ASSERT_FALSE(BaseAddress.hasValue()); +} diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index 7abb0bea041a..926e65b874ca 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_bolt_unittest(CoreTests + BinaryContext.cpp MCPlusBuilder.cpp ) -- cgit v1.2.3