[BOLT][perf2bolt] Fix base address calculation for shared objects

When processing profile data for shared object or PIE, perf2bolt needs
to calculate base address of the binary based on the map info reported
by the perf tool. When the mapping data provided is for the second
(or any other than the first) segment and the segment's file offset
does not match its memory offset, perf2bolt uses wrong assumption
about the binary base address.

Add a function to calculate binary base address using the reported
memory mapping and use the returned base for further address
adjustments.

Reviewed By: yota9

Differential Revision: https://reviews.llvm.org/D123755
This commit is contained in:
Maksim Panchenko 2022-04-13 19:39:39 -07:00
parent 48fbcedb38
commit 77b75ca53f
6 changed files with 146 additions and 22 deletions

View File

@ -970,6 +970,15 @@ public:
Sections.end()));
}
/// Return base address for the shared object or PIE based on the segment
/// mapping information. \p MMapAddress is an address where one of the
/// segments was mapped. \p FileOffset is the offset in the file of the
/// mapping. Note that \p FileOffset should be page-aligned and could be
/// different from the file offset of the segment which could be unaligned.
/// If no segment is found that matches \p FileOffset, return NoneType().
Optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
uint64_t FileOffset) const;
/// Check if the address belongs to this binary's static allocation space.
bool containsAddress(uint64_t Address) const {
return Address >= FirstAllocAddress && Address < LayoutStartAddress;

View File

@ -168,14 +168,15 @@ private:
/// from the file name in BC.
std::string BuildIDBinaryName;
/// Memory map info for a single file
/// Memory map info for a single file as recorded in perf.data
struct MMapInfo {
uint64_t BaseAddress;
uint64_t Size;
uint64_t Offset;
int32_t PID{-1};
bool Forked{false};
uint64_t Time{0ULL}; // time in micro seconds
uint64_t BaseAddress{0}; /// Base address of the mapped binary.
uint64_t MMapAddress{0}; /// Address of the executable segment.
uint64_t Size{0}; /// Size of the mapping.
uint64_t Offset{0}; /// File offset of the mapped segment.
int32_t PID{-1}; /// Process ID.
bool Forked{false}; /// Was the process forked?
uint64_t Time{0ULL}; /// Time in micro seconds.
};
/// Per-PID map info for the binary
@ -420,12 +421,8 @@ private:
/// correspond to the binary allocated address space, are adjusted to avoid
/// conflicts.
void adjustAddress(uint64_t &Address, const MMapInfo &MMI) const {
if (Address >= MMI.BaseAddress && Address < MMI.BaseAddress + MMI.Size) {
// NOTE: Assumptions about the binary segment load table (PH for ELF)
// Segment file offset equals virtual address (which is true for .so)
// There aren't multiple executable segments loaded because MMapInfo
// doesn't support them.
Address -= MMI.BaseAddress - MMI.Offset;
if (Address >= MMI.MMapAddress && Address < MMI.MMapAddress + MMI.Size) {
Address -= MMI.BaseAddress;
} else if (Address < MMI.Size) {
// Make sure the address is not treated as belonging to the binary.
Address = (-1ULL);

View File

@ -1690,6 +1690,22 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
}
}
Optional<uint64_t>
BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
uint64_t FileOffset) const {
// Find a segment with a matching file offset.
for (auto &KV : SegmentMapInfo) {
const SegmentInfo &SegInfo = KV.second;
if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
// Use segment's aligned memory offset to calculate the base address.
const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
return MMapAddress - MemOffset;
}
}
return NoneType();
}
ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
auto SI = AddressToSection.upper_bound(Address);
if (SI != AddressToSection.begin()) {

View File

@ -1943,7 +1943,7 @@ DataAggregator::parseMMapEvent() {
}
const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) {
if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
reportError("expected base address");
Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
return make_error_code(llvm::errc::io_error);
@ -2003,7 +2003,7 @@ std::error_code DataAggregator::parseMMapEvents() {
dbgs() << "FileName -> mmap info:\n";
for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x"
<< Twine::utohexstr(Pair.second.BaseAddress) << ", "
<< Twine::utohexstr(Pair.second.MMapAddress) << ", "
<< Twine::utohexstr(Pair.second.Size) << " @ "
<< Twine::utohexstr(Pair.second.Offset) << "]\n";
});
@ -2017,29 +2017,45 @@ std::error_code DataAggregator::parseMMapEvents() {
auto Range = GlobalMMapInfo.equal_range(NameToUse);
for (auto I = Range.first; I != Range.second; ++I) {
const MMapInfo &MMapInfo = I->second;
if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) {
MMapInfo &MMapInfo = I->second;
if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
// Check that the binary mapping matches one of the segments.
bool MatchFound = false;
for (auto &KV : BC->SegmentMapInfo) {
SegmentInfo &SegInfo = KV.second;
// The mapping is page-aligned and hence the BaseAddress could be
// The mapping is page-aligned and hence the MMapAddress could be
// different from the segment start address. We cannot know the page
// size of the mapping, but we know it should not exceed the segment
// alignment value. Hence we are performing an approximate check.
if (SegInfo.Address >= MMapInfo.BaseAddress &&
SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) {
if (SegInfo.Address >= MMapInfo.MMapAddress &&
SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
MatchFound = true;
break;
}
}
if (!MatchFound) {
errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
<< " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n';
<< " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
continue;
}
}
// Set base address for shared objects.
if (!BC->HasFixedLoadAddress) {
Optional<uint64_t> BaseAddress =
BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
if (!BaseAddress) {
errs() << "PERF2BOLT-WARNING: unable to find base address of the "
"binary when memory mapped at 0x"
<< Twine::utohexstr(MMapInfo.MMapAddress)
<< " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
<< ". Ignoring profile data for this mapping\n";
continue;
} else {
MMapInfo.BaseAddress = *BaseAddress;
}
}
BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
}
@ -2110,7 +2126,7 @@ std::error_code DataAggregator::parseTaskEvents() {
LLVM_DEBUG({
for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
<< ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x"
<< ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
<< Twine::utohexstr(MMI.second.Size) << ")\n";
});

View File

@ -0,0 +1,85 @@
#include "bolt/Core/BinaryContext.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/TargetSelect.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace bolt;
namespace {
struct BinaryContextTester : public testing::TestWithParam<Triple::ArchType> {
void SetUp() override {
initalizeLLVM();
prepareElf();
initializeBOLT();
}
protected:
void initalizeLLVM() {
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
}
void prepareElf() {
memcpy(ElfBuf, "\177ELF", 4);
ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64;
MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
ObjFile = cantFail(ObjectFile::createObjectFile(Source));
}
void initializeBOLT() {
BC = cantFail(BinaryContext::createBinaryContext(
ObjFile.get(), true, DWARFContext::create(*ObjFile.get())));
ASSERT_FALSE(!BC);
}
char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
std::unique_ptr<ObjectFile> ObjFile;
std::unique_ptr<BinaryContext> BC;
};
} // namespace
#ifdef X86_AVAILABLE
INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester,
::testing::Values(Triple::x86_64));
#endif
#ifdef AARCH64_AVAILABLE
INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester,
::testing::Values(Triple::aarch64));
#endif
TEST_P(BinaryContextTester, BaseAddress) {
// Check that base address calculation is correct for a binary with the
// following segment layout:
BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000};
BC->SegmentMapInfo[0x10e8d2b4] =
SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000};
BC->SegmentMapInfo[0x4a3bddc0] =
SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000};
BC->SegmentMapInfo[0x4b84d5e8] =
SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000};
Optional<uint64_t> BaseAddress =
BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000);
ASSERT_TRUE(BaseAddress.hasValue());
ASSERT_EQ(*BaseAddress, 0x7f13e46c9000ULL);
BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000);
ASSERT_FALSE(BaseAddress.hasValue());
}

View File

@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
)
add_bolt_unittest(CoreTests
BinaryContext.cpp
MCPlusBuilder.cpp
)