//===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the BinaryContext class. // //===----------------------------------------------------------------------===// #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryEmitter.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/NameResolver.h" #include "bolt/Utils/Utils.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/Regex.h" #include #include #include #include #include using namespace llvm; #undef DEBUG_TYPE #define DEBUG_TYPE "bolt" namespace opts { cl::opt NoHugePages("no-huge-pages", cl::desc("use regular size pages for code alignment"), cl::Hidden, cl::cat(BoltCategory)); static cl::opt PrintDebugInfo("print-debug-info", cl::desc("print debug info when printing functions"), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltCategory)); cl::opt PrintRelocations( "print-relocations", cl::desc("print relocations when printing functions/objects"), cl::Hidden, cl::cat(BoltCategory)); static cl::opt PrintMemData("print-mem-data", cl::desc("print memory data annotations when printing functions"), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltCategory)); } // namespace opts namespace llvm { namespace bolt { BinaryContext::BinaryContext(std::unique_ptr Ctx, std::unique_ptr DwCtx, std::unique_ptr TheTriple, const Target *TheTarget, std::string TripleName, std::unique_ptr MCE, std::unique_ptr MOFI, std::unique_ptr AsmInfo, std::unique_ptr MII, std::unique_ptr STI, std::unique_ptr InstPrinter, std::unique_ptr MIA, std::unique_ptr MIB, std::unique_ptr MRI, std::unique_ptr DisAsm) : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), TheTriple(std::move(TheTriple)), TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { Relocation::Arch = this->TheTriple->getArch(); RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; } BinaryContext::~BinaryContext() { for (BinarySection *Section : Sections) delete Section; for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) delete InjectedFunction; for (std::pair JTI : JumpTables) delete JTI.second; clearBinaryData(); } /// Create BinaryContext for a given architecture \p ArchName and /// triple \p TripleName. Expected> BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, std::unique_ptr DwCtx) { StringRef ArchName = ""; StringRef FeaturesStr = ""; switch (File->getArch()) { case llvm::Triple::x86_64: ArchName = "x86-64"; FeaturesStr = "+nopl"; break; case llvm::Triple::aarch64: ArchName = "aarch64"; FeaturesStr = "+all"; break; default: return createStringError(std::errc::not_supported, "BOLT-ERROR: Unrecognized machine in ELF file"); } auto TheTriple = std::make_unique(File->makeTriple()); const std::string TripleName = TheTriple->str(); std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); if (!TheTarget) return createStringError(make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: ", Error)); std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); if (!MRI) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no register info for target ", TripleName)); // Set up disassembler. std::unique_ptr AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); if (!AsmInfo) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no assembly info for target ", TripleName)); // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump // we want to emit such names as using @PLT without double quotes to convey // variant kind to the assembler. BOLT doesn't rely on the linker so we can // override the default AsmInfo behavior to emit names the way we want. AsmInfo->setAllowAtInName(true); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); if (!STI) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); std::unique_ptr MII(TheTarget->createMCInstrInfo()); if (!MII) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no instruction info for target ", TripleName)); std::unique_ptr Ctx( new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); std::unique_ptr MOFI( TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); Ctx->setObjectFileInfo(MOFI.get()); // We do not support X86 Large code model. Change this in the future. bool Large = false; if (TheTriple->getArch() == llvm::Triple::aarch64) Large = true; unsigned LSDAEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; if (IsPIC) { LSDAEncoding = dwarf::DW_EH_PE_pcrel | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); } std::unique_ptr DisAsm( TheTarget->createMCDisassembler(*STI, *Ctx)); if (!DisAsm) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); std::unique_ptr MIA( TheTarget->createMCInstrAnalysis(MII.get())); if (!MIA) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: failed to create instruction analysis for target ", TripleName)); int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); std::unique_ptr InstructionPrinter( TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, *MII, *MRI)); if (!InstructionPrinter) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); InstructionPrinter->setPrintImmHex(true); std::unique_ptr MCE( TheTarget->createMCCodeEmitter(*MII, *Ctx)); // Make sure we don't miss any output on core dumps. outs().SetUnbuffered(); errs().SetUnbuffered(); dbgs().SetUnbuffered(); auto BC = std::make_unique( std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI), std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), std::move(DisAsm)); BC->LSDAEncoding = LSDAEncoding; BC->MAB = std::unique_ptr( BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); BC->setFilename(File->getFileName()); BC->HasFixedLoadAddress = !IsPIC; BC->SymbolicDisAsm = std::unique_ptr( BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); if (!BC->SymbolicDisAsm) return createStringError( make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); return std::move(BC); } bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { if (opts::HotText && (SymbolName == "__hot_start" || SymbolName == "__hot_end")) return true; if (opts::HotData && (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) return true; if (SymbolName == "_end") return true; return false; } std::unique_ptr BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { return MAB->createObjectWriter(OS); } bool BinaryContext::validateObjectNesting() const { auto Itr = BinaryDataMap.begin(); auto End = BinaryDataMap.end(); bool Valid = true; while (Itr != End) { auto Next = std::next(Itr); while (Next != End && Itr->second->getSection() == Next->second->getSection() && Itr->second->containsRange(Next->second->getAddress(), Next->second->getSize())) { if (Next->second->Parent != Itr->second) { errs() << "BOLT-WARNING: object nesting incorrect for:\n" << "BOLT-WARNING: " << *Itr->second << "\n" << "BOLT-WARNING: " << *Next->second << "\n"; Valid = false; } ++Next; } Itr = Next; } return Valid; } bool BinaryContext::validateHoles() const { bool Valid = true; for (BinarySection &Section : sections()) { for (const Relocation &Rel : Section.relocations()) { uint64_t RelAddr = Rel.Offset + Section.getAddress(); const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); if (!BD) { errs() << "BOLT-WARNING: no BinaryData found for relocation at address" << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() << "\n"; Valid = false; } else if (!BD->getAtomicRoot()) { errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " << "address 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() << "\n"; Valid = false; } } } return Valid; } void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { const uint64_t Address = GAI->second->getAddress(); const uint64_t Size = GAI->second->getSize(); auto fixParents = [&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) { BinaryData *OldParent = Itr->second->Parent; Itr->second->Parent = NewParent; ++Itr; while (Itr != BinaryDataMap.end() && OldParent && Itr->second->Parent == OldParent) { Itr->second->Parent = NewParent; ++Itr; } }; // Check if the previous symbol contains the newly added symbol. if (GAI != BinaryDataMap.begin()) { BinaryData *Prev = std::prev(GAI)->second; while (Prev) { if (Prev->getSection() == GAI->second->getSection() && Prev->containsRange(Address, Size)) { fixParents(GAI, Prev); } else { fixParents(GAI, nullptr); } Prev = Prev->Parent; } } // Check if the newly added symbol contains any subsequent symbols. if (Size != 0) { BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; auto Itr = std::next(GAI); while ( Itr != BinaryDataMap.end() && BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { Itr->second->Parent = BD; ++Itr; } } } iterator_range BinaryContext::getSubBinaryData(BinaryData *BD) { auto Start = std::next(BinaryDataMap.find(BD->getAddress())); auto End = Start; while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) ++End; return make_range(Start, End); } std::pair BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel) { if (isAArch64()) { // Check if this is an access to a constant island and create bookkeeping // to keep track of it and emit it later as part of this function. if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) return std::make_pair(IslandSym, 0); // Detect custom code written in assembly that refers to arbitrary // constant islands from other functions. Write this reference so we // can pull this constant island and emit it as part of this function // too. auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); if (IslandIter != AddressToConstantIslandMap.begin() && (IslandIter == AddressToConstantIslandMap.end() || IslandIter->first > Address)) --IslandIter; if (IslandIter != AddressToConstantIslandMap.end()) { if (MCSymbol *IslandSym = IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { BF.createIslandDependency(IslandSym, IslandIter->second); return std::make_pair(IslandSym, 0); } } } // Note that the address does not necessarily have to reside inside // a section, it could be an absolute address too. ErrorOr Section = getSectionForAddress(Address); if (Section && Section->isText()) { if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { if (Address != BF.getAddress()) { // The address could potentially escape. Mark it as another entry // point into the function. if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: potentially escaped address 0x" << Twine::utohexstr(Address) << " in function " << BF << '\n'; } BF.HasInternalLabelReference = true; return std::make_pair( BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); } } else { addInterproceduralReference(&BF, Address); } } // With relocations, catch jump table references outside of the basic block // containing the indirect jump. if (HasRelocations) { const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { const MCSymbol *Symbol = getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); return std::make_pair(Symbol, 0); } } if (BinaryData *BD = getBinaryDataContainingAddress(Address)) return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); // TODO: use DWARF info to get size/alignment here? MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); return std::make_pair(TargetSymbol, 0); } MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) { if (!isX86()) return MemoryContentsType::UNKNOWN; ErrorOr Section = getSectionForAddress(Address); if (!Section) { // No section - possibly an absolute address. Since we don't allow // internal function addresses to escape the function scope - we // consider it a tail call. if (opts::Verbosity > 1) { errs() << "BOLT-WARNING: no section for address 0x" << Twine::utohexstr(Address) << " referenced from function " << BF << '\n'; } return MemoryContentsType::UNKNOWN; } if (Section->isVirtual()) { // The contents are filled at runtime. return MemoryContentsType::UNKNOWN; } // No support for jump tables in code yet. if (Section->isText()) return MemoryContentsType::UNKNOWN; // Start with checking for PIC jump table. We expect non-PIC jump tables // to have high 32 bits set to 0. if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) return MemoryContentsType::POSSIBLE_JUMP_TABLE; return MemoryContentsType::UNKNOWN; } /// Check if == .cold(.\d+)? bool isPotentialFragmentByName(BinaryFunction &Fragment, BinaryFunction &Parent) { for (StringRef Name : Parent.getNames()) { std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); if (Fragment.hasRestoredNameRegex(NameRegex)) return true; } return false; } bool BinaryContext::analyzeJumpTable( const uint64_t Address, const JumpTable::JumpTableType Type, BinaryFunction &BF, const uint64_t NextJTAddress, JumpTable::AddressesType *EntriesAsAddress) { // Is one of the targets __builtin_unreachable? bool HasUnreachable = false; // Number of targets other than __builtin_unreachable. uint64_t NumRealEntries = 0; auto addEntryAddress = [&](uint64_t EntryAddress) { if (EntriesAsAddress) EntriesAsAddress->emplace_back(EntryAddress); }; auto doesBelongToFunction = [&](const uint64_t Addr, BinaryFunction *TargetBF) -> bool { if (BF.containsAddress(Addr)) return true; // Nothing to do if we failed to identify the containing function. if (!TargetBF) return false; // Case 1: check if BF is a fragment and TargetBF is its parent. if (BF.isFragment()) { // Parent function may or may not be already registered. // Set parent link based on function name matching heuristic. return registerFragment(BF, *TargetBF); } // Case 2: check if TargetBF is a fragment and BF is its parent. return TargetBF->isFragment() && registerFragment(*TargetBF, BF); }; ErrorOr Section = getSectionForAddress(Address); if (!Section) return false; // The upper bound is defined by containing object, section limits, and // the next jump table in memory. uint64_t UpperBound = Section->getEndAddress(); const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); if (JumpTableBD && JumpTableBD->getSize()) { assert(JumpTableBD->getEndAddress() <= UpperBound && "data object cannot cross a section boundary"); UpperBound = JumpTableBD->getEndAddress(); } if (NextJTAddress) UpperBound = std::min(NextJTAddress, UpperBound); LLVM_DEBUG({ using JTT = JumpTable::JumpTableType; dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", Address, BF.getPrintName(), Type == JTT::JTT_PIC ? "PIC" : "Normal"); }); const uint64_t EntrySize = getJumpTableEntrySize(Type); for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; EntryAddress += EntrySize) { LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) << " -> "); // Check if there's a proper relocation against the jump table entry. if (HasRelocations) { if (Type == JumpTable::JTT_PIC && !DataPCRelocations.count(EntryAddress)) { LLVM_DEBUG( dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); break; } if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { LLVM_DEBUG( dbgs() << "FAIL: JTT_NORMAL table, no relocation for this address\n"); break; } } const uint64_t Value = (Type == JumpTable::JTT_PIC) ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) : *getPointerAtAddress(EntryAddress); // __builtin_unreachable() case. if (Value == BF.getAddress() + BF.getSize()) { addEntryAddress(Value); HasUnreachable = true; LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); continue; } // Function or one of its fragments. BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); // We assume that a jump table cannot have function start as an entry. if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { LLVM_DEBUG({ if (!BF.containsAddress(Value)) { dbgs() << "FAIL: function doesn't contain this address\n"; if (TargetBF) { dbgs() << " ! function containing this address: " << TargetBF->getPrintName() << '\n'; if (TargetBF->isFragment()) { dbgs() << " ! is a fragment"; for (BinaryFunction *Parent : TargetBF->ParentFragments) dbgs() << ", parent: " << Parent->getPrintName(); dbgs() << '\n'; } } } if (Value == BF.getAddress()) dbgs() << "FAIL: jump table cannot have function start as an entry\n"; }); break; } // Check there's an instruction at this offset. if (TargetBF->getState() == BinaryFunction::State::Disassembled && !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); break; } ++NumRealEntries; LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); if (TargetBF != &BF) BF.setHasIndirectTargetToSplitFragment(true); addEntryAddress(Value); } // It's a jump table if the number of real entries is more than 1, or there's // one real entry and "unreachable" targets. If there are only multiple // "unreachable" targets, then it's not a jump table. return NumRealEntries + HasUnreachable >= 2; } void BinaryContext::populateJumpTables() { LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() << '\n'); for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; ++JTI) { JumpTable *JT = JTI->second; bool NonSimpleParent = false; for (BinaryFunction *BF : JT->Parents) NonSimpleParent |= !BF->isSimple(); if (NonSimpleParent) continue; uint64_t NextJTAddress = 0; auto NextJTI = std::next(JTI); if (NextJTI != JTE) NextJTAddress = NextJTI->second->getAddress(); const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), NextJTAddress, &JT->EntriesAsAddress); if (!Success) { LLVM_DEBUG({ dbgs() << "failed to analyze "; JT->print(dbgs()); if (NextJTI != JTE) { dbgs() << "next "; NextJTI->second->print(dbgs()); } }); llvm_unreachable("jump table heuristic failure"); } for (BinaryFunction *Frag : JT->Parents) { for (uint64_t EntryAddress : JT->EntriesAsAddress) // if target is builtin_unreachable if (EntryAddress == Frag->getAddress() + Frag->getSize()) { Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), Frag->getSize()); } else if (EntryAddress >= Frag->getAddress() && EntryAddress < Frag->getAddress() + Frag->getSize()) { Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); } } // In strict mode, erase PC-relative relocation record. Later we check that // all such records are erased and thus have been accounted for. if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { for (uint64_t Address = JT->getAddress(); Address < JT->getAddress() + JT->getSize(); Address += JT->EntrySize) { DataPCRelocations.erase(DataPCRelocations.find(Address)); } } // Mark to skip the function and all its fragments. for (BinaryFunction *Frag : JT->Parents) if (Frag->hasIndirectTargetToSplitFragment()) addFragmentsToSkip(Frag); } if (opts::StrictMode && DataPCRelocations.size()) { LLVM_DEBUG({ dbgs() << DataPCRelocations.size() << " unclaimed PC-relative relocations left in data:\n"; for (uint64_t Reloc : DataPCRelocations) dbgs() << Twine::utohexstr(Reloc) << '\n'; }); assert(0 && "unclaimed PC-relative relocations left in data\n"); } clearList(DataPCRelocations); } void BinaryContext::skipMarkedFragments() { std::vector FragmentQueue; // Copy the functions to FragmentQueue. FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); auto addToWorklist = [&](BinaryFunction *Function) -> void { if (FragmentsToSkip.count(Function)) return; FragmentQueue.push_back(Function); addFragmentsToSkip(Function); }; // Functions containing split jump tables need to be skipped with all // fragments (transitively). for (size_t I = 0; I != FragmentQueue.size(); I++) { BinaryFunction *BF = FragmentQueue[I]; assert(FragmentsToSkip.count(BF) && "internal error in traversing function fragments"); if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; BF->setSimple(false); BF->setHasIndirectTargetToSplitFragment(true); llvm::for_each(BF->Fragments, addToWorklist); llvm::for_each(BF->ParentFragments, addToWorklist); } if (!FragmentsToSkip.empty()) errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") << " due to cold fragments\n"; } MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, uint64_t Size, uint16_t Alignment, unsigned Flags) { auto Itr = BinaryDataMap.find(Address); if (Itr != BinaryDataMap.end()) { assert(Itr->second->getSize() == Size || !Size); return Itr->second->getSymbol(); } std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); assert(!GlobalSymbols.count(Name) && "created name is not unique"); return registerNameAtAddress(Name, Address, Size, Alignment, Flags); } MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { return Ctx->getOrCreateSymbol(Name); } BinaryFunction *BinaryContext::createBinaryFunction( const std::string &Name, BinarySection &Section, uint64_t Address, uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { auto Result = BinaryFunctions.emplace( Address, BinaryFunction(Name, Section, Address, Size, *this)); assert(Result.second == true && "unexpected duplicate function"); BinaryFunction *BF = &Result.first->second; registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, Alignment); setSymbolToFunctionMap(BF->getSymbol(), BF); return BF; } const MCSymbol * BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, JumpTable::JumpTableType Type) { auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); }; (void)isFragmentOf; // Two fragments of same function access same jump table if (JumpTable *JT = getJumpTableContainingAddress(Address)) { assert(JT->Type == Type && "jump table types have to match"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); // Prevent associating a jump table to a specific fragment twice. // This simple check arises from the assumption: no more than 2 fragments. if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { assert((isFragmentOf(JT->Parents[0], &Function) || isFragmentOf(&Function, JT->Parents[0])) && "cannot re-use jump table of a different function"); // Duplicate the entry for the parent function for easy access JT->Parents.push_back(&Function); if (opts::Verbosity > 2) { outs() << "BOLT-INFO: Multiple fragments access same jump table: " << JT->Parents[0]->getPrintName() << "; " << Function.getPrintName() << "\n"; JT->print(outs()); } Function.JumpTables.emplace(Address, JT); JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); } bool IsJumpTableParent = false; (void)IsJumpTableParent; for (BinaryFunction *Frag : JT->Parents) if (Frag == &Function) IsJumpTableParent = true; assert(IsJumpTableParent && "cannot re-use jump table of a different function"); return JT->getFirstLabel(); } // Re-use the existing symbol if possible. MCSymbol *JTLabel = nullptr; if (BinaryData *Object = getBinaryDataAtAddress(Address)) { if (!isInternalSymbolName(Object->getSymbol()->getName())) JTLabel = Object->getSymbol(); } const uint64_t EntrySize = getJumpTableEntrySize(Type); if (!JTLabel) { const std::string JumpTableName = generateJumpTableName(Function, Address); JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); } LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() << " in function " << Function << '\n'); JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, JumpTable::LabelMapType{{0, JTLabel}}, *getSectionForAddress(Address)); JT->Parents.push_back(&Function); if (opts::Verbosity > 2) JT->print(outs()); JumpTables.emplace(Address, JT); // Duplicate the entry for the parent function for easy access. Function.JumpTables.emplace(Address, JT); return JTLabel; } std::pair BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, const MCSymbol *OldLabel) { auto L = scopeLock(); unsigned Offset = 0; bool Found = false; for (std::pair Elmt : JT->Labels) { if (Elmt.second != OldLabel) continue; Offset = Elmt.first; Found = true; break; } assert(Found && "Label not found"); (void)Found; MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); JumpTable *NewJT = new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, JumpTable::LabelMapType{{Offset, NewLabel}}, *getSectionForAddress(JT->getAddress())); NewJT->Parents = JT->Parents; NewJT->Entries = JT->Entries; NewJT->Counts = JT->Counts; uint64_t JumpTableID = ++DuplicatedJumpTables; // Invert it to differentiate from regular jump tables whose IDs are their // addresses in the input binary memory space JumpTableID = ~JumpTableID; JumpTables.emplace(JumpTableID, NewJT); Function.JumpTables.emplace(JumpTableID, NewJT); return std::make_pair(JumpTableID, NewLabel); } std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, uint64_t Address) { size_t Id; uint64_t Offset = 0; if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { Offset = Address - JT->getAddress(); auto Itr = JT->Labels.find(Offset); if (Itr != JT->Labels.end()) return std::string(Itr->second->getName()); Id = JumpTableIds.at(JT->getAddress()); } else { Id = JumpTableIds[Address] = BF.JumpTables.size(); } return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + (Offset ? ("." + std::to_string(Offset)) : "")); } bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { // FIXME: aarch64 support is missing. if (!isX86()) return true; if (BF.getSize() == BF.getMaxSize()) return true; ErrorOr> FunctionData = BF.getData(); assert(FunctionData && "cannot get function as data"); uint64_t Offset = BF.getSize(); MCInst Instr; uint64_t InstrSize = 0; uint64_t InstrAddress = BF.getAddress() + Offset; using std::placeholders::_1; // Skip instructions that satisfy the predicate condition. auto skipInstructions = [&](std::function Predicate) { const uint64_t StartOffset = Offset; for (; Offset < BF.getMaxSize(); Offset += InstrSize, InstrAddress += InstrSize) { if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), InstrAddress, nulls())) break; if (!Predicate(Instr)) break; } return Offset - StartOffset; }; // Skip a sequence of zero bytes. auto skipZeros = [&]() { const uint64_t StartOffset = Offset; for (; Offset < BF.getMaxSize(); ++Offset) if ((*FunctionData)[Offset] != 0) break; return Offset - StartOffset; }; // Accept the whole padding area filled with breakpoints. auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) return true; auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); // Some functions have a jump to the next function or to the padding area // inserted after the body. auto isSkipJump = [&](const MCInst &Instr) { uint64_t TargetAddress = 0; if (MIB->isUnconditionalBranch(Instr) && MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { if (TargetAddress >= InstrAddress + InstrSize && TargetAddress <= BF.getAddress() + BF.getMaxSize()) { return true; } } return false; }; // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || skipZeros()) ; if (Offset == BF.getMaxSize()) return true; if (opts::Verbosity >= 1) { errs() << "BOLT-WARNING: bad padding at address 0x" << Twine::utohexstr(BF.getAddress() + BF.getSize()) << " starting at offset " << (Offset - BF.getSize()) << " in function " << BF << '\n' << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) << '\n'; } return false; } void BinaryContext::adjustCodePadding() { for (auto &BFI : BinaryFunctions) { BinaryFunction &BF = BFI.second; if (!shouldEmit(BF)) continue; if (!hasValidCodePadding(BF)) { if (HasRelocations) { if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: function " << BF << " has invalid padding. Ignoring the function.\n"; } BF.setIgnored(); } else { BF.setMaxSize(BF.getSize()); } } } } MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, uint64_t Size, uint16_t Alignment, unsigned Flags) { // Register the name with MCContext. MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); auto GAI = BinaryDataMap.find(Address); BinaryData *BD; if (GAI == BinaryDataMap.end()) { ErrorOr SectionOrErr = getSectionForAddress(Address); BinarySection &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection(); BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, Section, Flags); GAI = BinaryDataMap.emplace(Address, BD).first; GlobalSymbols[Name] = BD; updateObjectNesting(GAI); } else { BD = GAI->second; if (!BD->hasName(Name)) { GlobalSymbols[Name] = BD; BD->Symbols.push_back(Symbol); } } return Symbol; } const BinaryData * BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { auto NI = BinaryDataMap.lower_bound(Address); auto End = BinaryDataMap.end(); if ((NI != End && Address == NI->first) || ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { if (NI->second->containsAddress(Address)) return NI->second; // If this is a sub-symbol, see if a parent data contains the address. const BinaryData *BD = NI->second->getParent(); while (BD) { if (BD->containsAddress(Address)) return BD; BD = BD->getParent(); } } return nullptr; } bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { auto NI = BinaryDataMap.find(Address); assert(NI != BinaryDataMap.end()); if (NI == BinaryDataMap.end()) return false; // TODO: it's possible that a jump table starts at the same address // as a larger blob of private data. When we set the size of the // jump table, it might be smaller than the total blob size. In this // case we just leave the original size since (currently) it won't really // affect anything. assert((!NI->second->Size || NI->second->Size == Size || (NI->second->isJumpTable() && NI->second->Size > Size)) && "can't change the size of a symbol that has already had its " "size set"); if (!NI->second->Size) { NI->second->Size = Size; updateObjectNesting(NI); return true; } return false; } void BinaryContext::generateSymbolHashes() { auto isPadding = [](const BinaryData &BD) { StringRef Contents = BD.getSection().getContents(); StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); return (BD.getName().startswith("HOLEat") || SymData.find_first_not_of(0) == StringRef::npos); }; uint64_t NumCollisions = 0; for (auto &Entry : BinaryDataMap) { BinaryData &BD = *Entry.second; StringRef Name = BD.getName(); if (!isInternalSymbolName(Name)) continue; // First check if a non-anonymous alias exists and move it to the front. if (BD.getSymbols().size() > 1) { auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { return !isInternalSymbolName(Symbol->getName()); }); if (Itr != BD.getSymbols().end()) { size_t Idx = std::distance(BD.getSymbols().begin(), Itr); std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); continue; } } // We have to skip 0 size symbols since they will all collide. if (BD.getSize() == 0) { continue; } const uint64_t Hash = BD.getSection().hash(BD); const size_t Idx = Name.find("0x"); std::string NewName = (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); if (getBinaryDataByName(NewName)) { // Ignore collisions for symbols that appear to be padding // (i.e. all zeros or a "hole") if (!isPadding(BD)) { if (opts::Verbosity) { errs() << "BOLT-WARNING: collision detected when hashing " << BD << " with new name (" << NewName << "), skipping.\n"; } ++NumCollisions; } continue; } BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); GlobalSymbols[NewName] = &BD; } if (NumCollisions) { errs() << "BOLT-WARNING: " << NumCollisions << " collisions detected while hashing binary objects"; if (!opts::Verbosity) errs() << ". Use -v=1 to see the list."; errs() << '\n'; } } bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, BinaryFunction &Function) const { if (!isPotentialFragmentByName(TargetFunction, Function)) return false; assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); if (TargetFunction.isParentFragment(&Function)) return true; TargetFunction.addParentFragment(Function); Function.addFragment(TargetFunction); if (!HasRelocations) { TargetFunction.setSimple(false); Function.setSimple(false); } if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " << Function << '\n'; } return true; } void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits, MCInst &LoadHiBits, uint64_t Target) { const MCSymbol *TargetSymbol; uint64_t Addend = 0; std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, /*IsPCRel*/ true); int64_t Val; MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, ELF::R_AARCH64_ADR_PREL_PG_HI21); MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); } bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); if (TargetFunction) return false; ErrorOr Section = getSectionForAddress(Address); assert(Section && "cannot get section for referenced address"); if (!Section->isText()) return false; bool Ret = false; StringRef SectionContents = Section->getContents(); uint64_t Offset = Address - Section->getAddress(); const uint64_t MaxSize = SectionContents.size() - Offset; const uint8_t *Bytes = reinterpret_cast(SectionContents.data()); ArrayRef Data(Bytes + Offset, MaxSize); auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, MCInst &Instruction, uint64_t Offset, uint64_t AbsoluteInstrAddr, uint64_t TotalSize) -> bool { MCInst *TargetHiBits, *TargetLowBits; uint64_t TargetAddress, Count; Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), AbsoluteInstrAddr, Instruction, TargetHiBits, TargetLowBits, TargetAddress); if (!Count) return false; if (MatchOnly) return true; // NOTE The target symbol was created during disassemble's // handleExternalReference const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), *Section, Address, TotalSize); addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, TargetAddress); MIB->addAnnotation(Instruction, "AArch64Veneer", true); Veneer->addInstruction(Offset, std::move(Instruction)); --Count; for (auto It = std::prev(Instructions.end()); Count != 0; It = std::prev(It), --Count) { MIB->addAnnotation(It->second, "AArch64Veneer", true); Veneer->addInstruction(It->first, std::move(It->second)); } Veneer->getOrCreateLocalLabel(Address); Veneer->setMaxSize(TotalSize); Veneer->updateState(BinaryFunction::State::Disassembled); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address << "\n"); return true; }; uint64_t Size = 0, TotalSize = 0; BinaryFunction::InstrMapType VeneerInstructions; for (Offset = 0; Offset < MaxSize; Offset += Size) { MCInst Instruction; const uint64_t AbsoluteInstrAddr = Address + Offset; if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), AbsoluteInstrAddr, nulls())) break; TotalSize += Size; if (MIB->isBranch(Instruction)) { Ret = matchVeneer(VeneerInstructions, Instruction, Offset, AbsoluteInstrAddr, TotalSize); break; } VeneerInstructions.emplace(Offset, std::move(Instruction)); } return Ret; } void BinaryContext::processInterproceduralReferences() { for (const std::pair &It : InterproceduralReferences) { BinaryFunction &Function = *It.first; uint64_t Address = It.second; if (!Address || Function.isIgnored()) continue; BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); if (&Function == TargetFunction) continue; if (TargetFunction) { if (TargetFunction->isFragment() && !registerFragment(*TargetFunction, Function)) { errs() << "BOLT-WARNING: interprocedural reference between unrelated " "fragments: " << Function.getPrintName() << " and " << TargetFunction->getPrintName() << '\n'; } if (uint64_t Offset = Address - TargetFunction->getAddress()) TargetFunction->addEntryPointAtOffset(Offset); continue; } // Check if address falls in function padding space - this could be // unmarked data in code. In this case adjust the padding space size. ErrorOr Section = getSectionForAddress(Address); assert(Section && "cannot get section for referenced address"); if (!Section->isText()) continue; // PLT requires special handling and could be ignored in this context. StringRef SectionName = Section->getName(); if (SectionName == ".plt" || SectionName == ".plt.got") continue; // Check if it is aarch64 veneer written at Address if (isAArch64() && handleAArch64Veneer(Address)) continue; if (opts::processAllFunctions()) { errs() << "BOLT-ERROR: cannot process binaries with unmarked " << "object in code at address 0x" << Twine::utohexstr(Address) << " belonging to section " << SectionName << " in current mode\n"; exit(1); } TargetFunction = getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, /*UseMaxSize=*/true); // We are not going to overwrite non-simple functions, but for simple // ones - adjust the padding size. if (TargetFunction && TargetFunction->isSimple()) { errs() << "BOLT-WARNING: function " << *TargetFunction << " has an object detected in a padding region at address 0x" << Twine::utohexstr(Address) << '\n'; TargetFunction->setMaxSize(TargetFunction->getSize()); } } InterproceduralReferences.clear(); } void BinaryContext::postProcessSymbolTable() { fixBinaryDataHoles(); bool Valid = true; for (auto &Entry : BinaryDataMap) { BinaryData *BD = Entry.second; if ((BD->getName().startswith("SYMBOLat") || BD->getName().startswith("DATAat")) && !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && BD->getSection()) { errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; Valid = false; } } assert(Valid); (void)Valid; generateSymbolHashes(); } void BinaryContext::foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF) { assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && "cannot merge functions with multiple entry points"); std::unique_lock WriteCtxLock(CtxMutex, std::defer_lock); std::unique_lock WriteSymbolMapLock( SymbolToFunctionMapMutex, std::defer_lock); const StringRef ChildName = ChildBF.getOneName(); // Move symbols over and update bookkeeping info. for (MCSymbol *Symbol : ChildBF.getSymbols()) { ParentBF.getSymbols().push_back(Symbol); WriteSymbolMapLock.lock(); SymbolToFunctionMap[Symbol] = &ParentBF; WriteSymbolMapLock.unlock(); // NB: there's no need to update BinaryDataMap and GlobalSymbols. } ChildBF.getSymbols().clear(); // Move other names the child function is known under. llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); ChildBF.Aliases.clear(); if (HasRelocations) { // Merge execution counts of ChildBF into those of ParentBF. // Without relocations, we cannot reliably merge profiles as both functions // continue to exist and either one can be executed. ChildBF.mergeProfileDataInto(ParentBF); std::shared_lock ReadBfsLock(BinaryFunctionsMutex, std::defer_lock); std::unique_lock WriteBfsLock(BinaryFunctionsMutex, std::defer_lock); // Remove ChildBF from the global set of functions in relocs mode. ReadBfsLock.lock(); auto FI = BinaryFunctions.find(ChildBF.getAddress()); ReadBfsLock.unlock(); assert(FI != BinaryFunctions.end() && "function not found"); assert(&ChildBF == &FI->second && "function mismatch"); WriteBfsLock.lock(); ChildBF.clearDisasmState(); FI = BinaryFunctions.erase(FI); WriteBfsLock.unlock(); } else { // In non-relocation mode we keep the function, but rename it. std::string NewName = "__ICF_" + ChildName.str(); WriteCtxLock.lock(); ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); WriteCtxLock.unlock(); ChildBF.setFolded(&ParentBF); } } void BinaryContext::fixBinaryDataHoles() { assert(validateObjectNesting() && "object nesting inconsitency detected"); for (BinarySection &Section : allocatableSections()) { std::vector> Holes; auto isNotHole = [&Section](const binary_data_iterator &Itr) { BinaryData *BD = Itr->second; bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && (BD->getName().startswith("SYMBOLat0x") || BD->getName().startswith("DATAat0x") || BD->getName().startswith("ANONYMOUS"))); return !isHole && BD->getSection() == Section && !BD->getParent(); }; auto BDStart = BinaryDataMap.begin(); auto BDEnd = BinaryDataMap.end(); auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); uint64_t EndAddress = Section.getAddress(); while (Itr != End) { if (Itr->second->getAddress() > EndAddress) { uint64_t Gap = Itr->second->getAddress() - EndAddress; Holes.emplace_back(EndAddress, Gap); } EndAddress = Itr->second->getEndAddress(); ++Itr; } if (EndAddress < Section.getEndAddress()) Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); // If there is already a symbol at the start of the hole, grow that symbol // to cover the rest. Otherwise, create a new symbol to cover the hole. for (std::pair &Hole : Holes) { BinaryData *BD = getBinaryDataAtAddress(Hole.first); if (BD) { // BD->getSection() can be != Section if there are sections that // overlap. In this case it is probably safe to just skip the holes // since the overlapping section will not(?) have any symbols in it. if (BD->getSection() == Section) setBinaryDataSize(Hole.first, Hole.second); } else { getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); } } } assert(validateObjectNesting() && "object nesting inconsitency detected"); assert(validateHoles() && "top level hole detected in object map"); } void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { const BinarySection *CurrentSection = nullptr; bool FirstSection = true; for (auto &Entry : BinaryDataMap) { const BinaryData *BD = Entry.second; const BinarySection &Section = BD->getSection(); if (FirstSection || Section != *CurrentSection) { uint64_t Address, Size; StringRef Name = Section.getName(); if (Section) { Address = Section.getAddress(); Size = Section.getSize(); } else { Address = BD->getAddress(); Size = BD->getSize(); } OS << "BOLT-INFO: Section " << Name << ", " << "0x" + Twine::utohexstr(Address) << ":" << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; CurrentSection = &Section; FirstSection = false; } OS << "BOLT-INFO: "; const BinaryData *P = BD->getParent(); while (P) { OS << " "; P = P->getParent(); } OS << *BD << "\n"; } } Expected BinaryContext::getDwarfFile( StringRef Directory, StringRef FileName, unsigned FileNumber, Optional Checksum, Optional Source, unsigned CUID, unsigned DWARFVersion) { DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, FileNumber); } unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, const uint32_t SrcCUID, unsigned FileIndex) { DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(SrcUnit); const std::vector &FileNames = LineTable->Prologue.FileNames; // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 // means empty dir. assert(FileIndex > 0 && FileIndex <= FileNames.size() && "FileIndex out of range for the compilation unit."); StringRef Dir = ""; if (FileNames[FileIndex - 1].DirIdx != 0) { if (Optional DirName = dwarf::toString( LineTable->Prologue .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { Dir = *DirName; } } StringRef FileName = ""; if (Optional FName = dwarf::toString(FileNames[FileIndex - 1].Name)) FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, DstUnit->getVersion())); } std::vector BinaryContext::getSortedFunctions() { std::vector SortedFunctions(BinaryFunctions.size()); llvm::transform(BinaryFunctions, SortedFunctions.begin(), [](std::pair &BFI) { return &BFI.second; }); llvm::stable_sort(SortedFunctions, [](const BinaryFunction *A, const BinaryFunction *B) { if (A->hasValidIndex() && B->hasValidIndex()) { return A->getIndex() < B->getIndex(); } return A->hasValidIndex(); }); return SortedFunctions; } std::vector BinaryContext::getAllBinaryFunctions() { std::vector AllFunctions; AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions), [](std::pair &BFI) { return &BFI.second; }); llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); return AllFunctions; } Optional BinaryContext::getDWOCU(uint64_t DWOId) { auto Iter = DWOCUs.find(DWOId); if (Iter == DWOCUs.end()) return None; return Iter->second; } DWARFContext *BinaryContext::getDWOContext() const { if (DWOCUs.empty()) return nullptr; return &DWOCUs.begin()->second->getContext(); } /// Handles DWO sections that can either be in .o, .dwo or .dwp files. void BinaryContext::preprocessDWODebugInfo() { for (const std::unique_ptr &CU : DwCtx->compile_units()) { DWARFUnit *const DwarfUnit = CU.get(); if (llvm::Optional DWOId = DwarfUnit->getDWOId()) { DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); if (!DWOCU->isDWOUnit()) { std::string DWOName = dwarf::toString( DwarfUnit->getUnitDIE().find( {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), ""); outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " << DWOName << " was not retrieved and won't be updated. Please check " "relative path.\n"; continue; } DWOCUs[*DWOId] = DWOCU; } } } void BinaryContext::preprocessDebugInfo() { struct CURange { uint64_t LowPC; uint64_t HighPC; DWARFUnit *Unit; bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } }; // Building a map of address ranges to CUs similar to .debug_aranges and use // it to assign CU to functions. std::vector AllRanges; AllRanges.reserve(DwCtx->getNumCompileUnits()); for (const std::unique_ptr &CU : DwCtx->compile_units()) { Expected RangesOrError = CU->getUnitDIE().getAddressRanges(); if (!RangesOrError) { consumeError(RangesOrError.takeError()); continue; } for (DWARFAddressRange &Range : *RangesOrError) { // Parts of the debug info could be invalidated due to corresponding code // being removed from the binary by the linker. Hence we check if the // address is a valid one. if (containsAddress(Range.LowPC)) AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); } ContainsDwarf5 |= CU->getVersion() >= 5; ContainsDwarfLegacy |= CU->getVersion() < 5; } llvm::sort(AllRanges); for (auto &KV : BinaryFunctions) { const uint64_t FunctionAddress = KV.first; BinaryFunction &Function = KV.second; auto It = llvm::partition_point( AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); if (It != AllRanges.end() && It->LowPC <= FunctionAddress) Function.setDWARFUnit(It->Unit); } // Discover units with debug info that needs to be updated. for (const auto &KV : BinaryFunctions) { const BinaryFunction &BF = KV.second; if (shouldEmit(BF) && BF.getDWARFUnit()) ProcessedCUs.insert(BF.getDWARFUnit()); } // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) BF.setDWARFUnit(nullptr); } if (opts::Verbosity >= 1) { outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; } preprocessDWODebugInfo(); // Populate MCContext with DWARF files from all units. StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); for (const std::unique_ptr &CU : DwCtx->compile_units()) { const uint64_t CUID = CU->getOffset(); DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( GlobalPrefix + "line_table_start" + Twine(CUID))); if (!ProcessedCUs.count(CU.get())) continue; const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(CU.get()); const std::vector &FileNames = LineTable->Prologue.FileNames; uint16_t DwarfVersion = LineTable->Prologue.getVersion(); if (DwarfVersion >= 5) { Optional Checksum; if (LineTable->Prologue.ContentTypes.HasMD5) Checksum = LineTable->Prologue.FileNames[0].Checksum; Optional Name = dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); if (Optional DWOID = CU->getDWOId()) { auto Iter = DWOCUs.find(*DWOID); assert(Iter != DWOCUs.end() && "DWO CU was not found."); Name = dwarf::toString( Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); } BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, None); } BinaryLineTable.setDwarfVersion(DwarfVersion); // Assign a unique label to every line table, one per CU. // Make sure empty debug line tables are registered too. if (FileNames.empty()) { cantFail( getDwarfFile("", "", 0, None, None, CUID, DwarfVersion)); continue; } const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 // means empty dir. StringRef Dir = ""; if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) if (Optional DirName = dwarf::toString( LineTable->Prologue .IncludeDirectories[FileNames[I].DirIdx - Offset])) Dir = *DirName; StringRef FileName = ""; if (Optional FName = dwarf::toString(FileNames[I].Name)) FileName = *FName; assert(FileName != ""); Optional Checksum; if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) Checksum = LineTable->Prologue.FileNames[I].Checksum; cantFail( getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); } } } bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { if (Function.isPseudo()) return false; if (opts::processAllFunctions()) return true; if (Function.isIgnored()) return false; // In relocation mode we will emit non-simple functions with CFG. // If the function does not have a CFG it should be marked as ignored. return HasRelocations || Function.isSimple(); } void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { uint32_t Operation = Inst.getOperation(); switch (Operation) { case MCCFIInstruction::OpSameValue: OS << "OpSameValue Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break; case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break; case MCCFIInstruction::OpOffset: OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset " << Inst.getOffset(); break; case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); break; case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset " << Inst.getOffset(); break; case MCCFIInstruction::OpEscape: OS << "OpEscape"; break; case MCCFIInstruction::OpRestore: OS << "OpRestore Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpUndefined: OS << "OpUndefined Reg" << Inst.getRegister(); break; case MCCFIInstruction::OpRegister: OS << "OpRegister Reg" << Inst.getRegister() << " Reg" << Inst.getRegister2(); break; case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break; case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break; default: OS << "Op#" << Operation; break; } } MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { // For aarch64, the ABI defines mapping symbols so we identify data in the // code section (see IHI0056B). $x identifies a symbol starting code or the // end of a data chunk inside code, $d indentifies start of data. if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) return MarkerSymType::NONE; Expected NameOrError = Symbol.getName(); Expected TypeOrError = Symbol.getType(); if (!TypeOrError || !NameOrError) return MarkerSymType::NONE; if (*TypeOrError != SymbolRef::ST_Unknown) return MarkerSymType::NONE; if (*NameOrError == "$x" || NameOrError->startswith("$x.")) return MarkerSymType::CODE; if (*NameOrError == "$d" || NameOrError->startswith("$d.")) return MarkerSymType::DATA; return MarkerSymType::NONE; } bool BinaryContext::isMarker(const SymbolRef &Symbol) const { return getMarkerType(Symbol) != MarkerSymType::NONE; } static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const BinaryFunction *Function, DWARFContext *DwCtx) { DebugLineTableRowRef RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); if (RowRef == DebugLineTableRowRef::NULL_ROW) return; const DWARFDebugLine::LineTable *LineTable; if (Function && Function->getDWARFUnit() && Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { LineTable = Function->getDWARFLineTable(); } else { LineTable = DwCtx->getLineTableForUnit( DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); } assert(LineTable && "line table expected for instruction with debug info"); const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; if (Optional FName = dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) FileName = *FName; OS << " # debug line " << FileName << ":" << Row.Line; if (Row.Column) OS << ":" << Row.Column; if (Row.Discriminator) OS << " discriminator:" << Row.Discriminator; } void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, uint64_t Offset, const BinaryFunction *Function, bool PrintMCInst, bool PrintMemData, bool PrintRelocations, StringRef Endl) const { if (MIB->isEHLabel(Instruction)) { OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; return; } OS << format(" %08" PRIx64 ": ", Offset); if (MIB->isCFI(Instruction)) { uint32_t Offset = Instruction.getOperand(0).getImm(); OS << "\t!CFI\t$" << Offset << "\t; "; if (Function) printCFI(OS, *Function->getCFIFor(Instruction)); OS << Endl; return; } InstPrinter->printInst(&Instruction, 0, "", *STI, OS); if (MIB->isCall(Instruction)) { if (MIB->isTailCall(Instruction)) OS << " # TAILCALL "; if (MIB->isInvoke(Instruction)) { const Optional EHInfo = MIB->getEHInfo(Instruction); OS << " # handler: "; if (EHInfo->first) OS << *EHInfo->first; else OS << '0'; OS << "; action: " << EHInfo->second; const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); if (GnuArgsSize >= 0) OS << "; GNU_args_size = " << GnuArgsSize; } } else if (MIB->isIndirectBranch(Instruction)) { if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); } else { OS << " # UNKNOWN CONTROL FLOW"; } } if (Optional Offset = MIB->getOffset(Instruction)) OS << " # Offset: " << *Offset; MIB->printAnnotations(Instruction, OS); if (opts::PrintDebugInfo) printDebugInfo(OS, Instruction, Function, DwCtx.get()); if ((opts::PrintRelocations || PrintRelocations) && Function) { const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); Function->printRelocations(OS, Offset, Size); } OS << Endl; if (PrintMCInst) { Instruction.dump_pretty(OS, InstPrinter.get()); OS << Endl; } } Optional BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, uint64_t FileOffset) const { // Find a segment with a matching file offset. for (auto &KV : SegmentMapInfo) { const SegmentInfo &SegInfo = KV.second; if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { // Use segment's aligned memory offset to calculate the base address. const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); return MMapAddress - MemOffset; } } return NoneType(); } ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { auto SI = AddressToSection.upper_bound(Address); if (SI != AddressToSection.begin()) { --SI; uint64_t UpperBound = SI->first + SI->second->getSize(); if (!SI->second->getSize()) UpperBound += 1; if (UpperBound > Address) return *SI->second; } return std::make_error_code(std::errc::bad_address); } ErrorOr BinaryContext::getSectionNameForAddress(uint64_t Address) const { if (ErrorOr Section = getSectionForAddress(Address)) return Section->getName(); return std::make_error_code(std::errc::bad_address); } BinarySection &BinaryContext::registerSection(BinarySection *Section) { auto Res = Sections.insert(Section); (void)Res; assert(Res.second && "can't register the same section twice."); // Only register allocatable sections in the AddressToSection map. if (Section->isAllocatable() && Section->getAddress()) AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); NameToSection.insert( std::make_pair(std::string(Section->getName()), Section)); if (Section->hasSectionRef()) SectionRefToBinarySection.insert( std::make_pair(Section->getSectionRef(), Section)); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); return *Section; } BinarySection &BinaryContext::registerSection(SectionRef Section) { return registerSection(new BinarySection(*this, Section)); } BinarySection & BinaryContext::registerSection(const Twine &SectionName, const BinarySection &OriginalSection) { return registerSection( new BinarySection(*this, SectionName, OriginalSection)); } BinarySection & BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, unsigned ELFFlags, uint8_t *Data, uint64_t Size, unsigned Alignment) { auto NamedSections = getSectionByName(Name); if (NamedSections.begin() != NamedSections.end()) { assert(std::next(NamedSections.begin()) == NamedSections.end() && "can only update unique sections"); BinarySection *Section = NamedSections.begin()->second; LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); const bool Flag = Section->isAllocatable(); (void)Flag; Section->update(Data, Size, Alignment, ELFType, ELFFlags); LLVM_DEBUG(dbgs() << *Section << "\n"); // FIXME: Fix section flags/attributes for MachO. if (isELF()) assert(Flag == Section->isAllocatable() && "can't change section allocation status"); return *Section; } return registerSection( new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); } void BinaryContext::deregisterSectionName(const BinarySection &Section) { auto NameRange = NameToSection.equal_range(Section.getName().str()); while (NameRange.first != NameRange.second) { if (NameRange.first->second == &Section) { NameToSection.erase(NameRange.first); break; } ++NameRange.first; } } void BinaryContext::deregisterUnusedSections() { ErrorOr AbsSection = getUniqueSectionByName(""); for (auto SI = Sections.begin(); SI != Sections.end();) { BinarySection *Section = *SI; if (Section->hasSectionRef() || Section->getOutputSize() || (AbsSection && Section == &AbsSection.get())) { ++SI; continue; } LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() << '\n';); deregisterSectionName(*Section); SI = Sections.erase(SI); delete Section; } } bool BinaryContext::deregisterSection(BinarySection &Section) { BinarySection *SectionPtr = &Section; auto Itr = Sections.find(SectionPtr); if (Itr != Sections.end()) { auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); while (Range.first != Range.second) { if (Range.first->second == SectionPtr) { AddressToSection.erase(Range.first); break; } ++Range.first; } deregisterSectionName(*SectionPtr); Sections.erase(Itr); delete SectionPtr; return true; } return false; } void BinaryContext::renameSection(BinarySection &Section, const Twine &NewName) { auto Itr = Sections.find(&Section); assert(Itr != Sections.end() && "Section must exist to be renamed."); Sections.erase(Itr); deregisterSectionName(Section); Section.Name = NewName.str(); Section.setOutputName(NewName); NameToSection.insert(std::make_pair(NewName.str(), &Section)); // Reinsert with the new name. Sections.insert(&Section); } void BinaryContext::printSections(raw_ostream &OS) const { for (BinarySection *const &Section : Sections) OS << "BOLT-INFO: " << *Section << "\n"; } BinarySection &BinaryContext::absoluteSection() { if (ErrorOr Section = getUniqueSectionByName("")) return *Section; return registerOrUpdateSection("", ELF::SHT_NULL, 0u); } ErrorOr BinaryContext::getUnsignedValueAtAddress(uint64_t Address, size_t Size) const { const ErrorOr Section = getSectionForAddress(Address); if (!Section) return std::make_error_code(std::errc::bad_address); if (Section->isVirtual()) return 0; DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize()); auto ValueOffset = static_cast(Address - Section->getAddress()); return DE.getUnsigned(&ValueOffset, Size); } ErrorOr BinaryContext::getSignedValueAtAddress(uint64_t Address, size_t Size) const { const ErrorOr Section = getSectionForAddress(Address); if (!Section) return std::make_error_code(std::errc::bad_address); if (Section->isVirtual()) return 0; DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), AsmInfo->getCodePointerSize()); auto ValueOffset = static_cast(Address - Section->getAddress()); return DE.getSigned(&ValueOffset, Size); } void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type, uint64_t Addend, uint64_t Value) { ErrorOr Section = getSectionForAddress(Address); assert(Section && "cannot find section for address"); Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, Value); } void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type, uint64_t Addend, uint64_t Value) { ErrorOr Section = getSectionForAddress(Address); assert(Section && "cannot find section for address"); Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, Addend, Value); } bool BinaryContext::removeRelocationAt(uint64_t Address) { ErrorOr Section = getSectionForAddress(Address); assert(Section && "cannot find section for address"); return Section->removeRelocationAt(Address - Section->getAddress()); } const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { ErrorOr Section = getSectionForAddress(Address); if (!Section) return nullptr; return Section->getRelocationAt(Address - Section->getAddress()); } const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { ErrorOr Section = getSectionForAddress(Address); if (!Section) return nullptr; return Section->getDynamicRelocationAt(Address - Section->getAddress()); } void BinaryContext::markAmbiguousRelocations(BinaryData &BD, const uint64_t Address) { auto setImmovable = [&](BinaryData &BD) { BinaryData *Root = BD.getAtomicRoot(); LLVM_DEBUG(if (Root->isMoveable()) { dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " << "due to ambiguous relocation referencing 0x" << Twine::utohexstr(Address) << '\n'; }); Root->setIsMoveable(false); }; if (Address == BD.getAddress()) { setImmovable(BD); // Set previous symbol as immovable BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); if (Prev && Prev->getEndAddress() == BD.getAddress()) setImmovable(*Prev); } if (Address == BD.getEndAddress()) { setImmovable(BD); // Set next symbol as immovable BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); if (Next && Next->getAddress() == BD.getEndAddress()) setImmovable(*Next); } } BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, uint64_t *EntryDesc) { std::shared_lock Lock(SymbolToFunctionMapMutex); auto BFI = SymbolToFunctionMap.find(Symbol); if (BFI == SymbolToFunctionMap.end()) return nullptr; BinaryFunction *BF = BFI->second; if (EntryDesc) *EntryDesc = BF->getEntryIDForSymbol(Symbol); return BF; } void BinaryContext::exitWithBugReport(StringRef Message, const BinaryFunction &Function) const { errs() << "=======================================\n"; errs() << "BOLT is unable to proceed because it couldn't properly understand " "this function.\n"; errs() << "If you are running the most recent version of BOLT, you may " "want to " "report this and paste this dump.\nPlease check that there is no " "sensitive contents being shared in this dump.\n"; errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; ScopedPrinter SP(errs()); SP.printBinaryBlock("Function contents", *Function.getData()); errs() << "\n"; Function.dump(); errs() << "ERROR: " << Message; errs() << "\n=======================================\n"; exit(1); } BinaryFunction * BinaryContext::createInjectedBinaryFunction(const std::string &Name, bool IsSimple) { InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); BinaryFunction *BF = InjectedBinaryFunctions.back(); setSymbolToFunctionMap(BF->getSymbol(), BF); BF->CurrentState = BinaryFunction::State::CFG; return BF; } std::pair BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { // Adjust branch instruction to match the current layout. if (FixBranches) BF.fixBranches(); // Create local MC context to isolate the effect of ephemeral code emission. IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); MCContext *LocalCtx = MCEInstance.LocalCtx.get(); MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); SmallString<256> Code; raw_svector_ostream VecOS(Code); std::unique_ptr OW = MAB->createObjectWriter(VecOS); std::unique_ptr Streamer(TheTarget->createMCObjectStreamer( *TheTriple, *LocalCtx, std::unique_ptr(MAB), std::move(OW), std::unique_ptr(MCEInstance.MCE.release()), *STI, /*RelaxAll=*/false, /*IncrementalLinkerCompatible=*/false, /*DWARFMustBeAtTheEnd=*/false)); Streamer->initSections(false, *STI); MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); Section->setHasInstructions(true); // Create symbols in the LocalCtx so that they get destroyed with it. MCSymbol *StartLabel = LocalCtx->createTempSymbol(); MCSymbol *EndLabel = LocalCtx->createTempSymbol(); Streamer->switchSection(Section); Streamer->emitLabel(StartLabel); emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), /*EmitCodeOnly=*/true); Streamer->emitLabel(EndLabel); using LabelRange = std::pair; SmallVector SplitLabels; for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); MCSectionELF *const SplitSection = LocalCtx->getELFSection( BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); SplitSection->setHasInstructions(true); Streamer->switchSection(SplitSection); Streamer->emitLabel(SplitStartLabel); emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); Streamer->emitLabel(SplitEndLabel); // To avoid calling MCObjectStreamer::flushPendingLabels() which is // private Streamer->emitBytes(StringRef("")); Streamer->switchSection(Section); } // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or // MCStreamer::Finish(), which does more than we want Streamer->emitBytes(StringRef("")); MCAssembler &Assembler = static_cast(Streamer.get())->getAssembler(); MCAsmLayout Layout(Assembler); Assembler.layout(Layout); const uint64_t HotSize = Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); const uint64_t ColdSize = std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, [&](const uint64_t Accu, const LabelRange &Labels) { return Accu + Layout.getSymbolOffset(*Labels.second) - Layout.getSymbolOffset(*Labels.first); }); // Clean-up the effect of the code emission. for (const MCSymbol &Symbol : Assembler.symbols()) { MCSymbol *MutableSymbol = const_cast(&Symbol); MutableSymbol->setUndefined(); MutableSymbol->setIsRegistered(false); } return std::make_pair(HotSize, ColdSize); } bool BinaryContext::validateInstructionEncoding( ArrayRef InputSequence) const { MCInst Inst; uint64_t InstSize; DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); assert(InstSize == InputSequence.size() && "Disassembled instruction size does not match the sequence."); SmallString<256> Code; SmallVector Fixups; raw_svector_ostream VecOS(Code); MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); auto OutputSequence = ArrayRef((uint8_t *)Code.data(), Code.size()); if (InputSequence != OutputSequence) { if (opts::Verbosity > 1) { errs() << "BOLT-WARNING: mismatched encoding detected\n" << " input: " << InputSequence << '\n' << " output: " << OutputSequence << '\n'; } return false; } return true; } uint64_t BinaryContext::getHotThreshold() const { static uint64_t Threshold = 0; if (Threshold == 0) { Threshold = std::max( (uint64_t)opts::ExecutionCountThreshold, NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); } return Threshold; } BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { auto FI = BinaryFunctions.upper_bound(Address); if (FI == BinaryFunctions.begin()) return nullptr; --FI; const uint64_t UsedSize = UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) return nullptr; return &FI->second; } BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { // First, try to find a function starting at the given address. If the // function was folded, this will get us the original folded function if it // wasn't removed from the list, e.g. in non-relocation mode. auto BFI = BinaryFunctions.find(Address); if (BFI != BinaryFunctions.end()) return &BFI->second; // We might have folded the function matching the object at the given // address. In such case, we look for a function matching the symbol // registered at the original address. The new function (the one that the // original was folded into) will hold the symbol. if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { uint64_t EntryID = 0; BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); if (BF && EntryID == 0) return BF; } return nullptr; } DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( const DWARFAddressRangesVector &InputRanges) const { DebugAddressRangesVector OutputRanges; for (const DWARFAddressRange Range : InputRanges) { auto BFI = BinaryFunctions.lower_bound(Range.LowPC); while (BFI != BinaryFunctions.end()) { const BinaryFunction &Function = BFI->second; if (Function.getAddress() >= Range.HighPC) break; const DebugAddressRangesVector FunctionRanges = Function.getOutputAddressRanges(); llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); std::advance(BFI, 1); } } return OutputRanges; } } // namespace bolt } // namespace llvm