From d5a963ab8b40fcf7a99acd834e5f10a1a30cc2e5 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Mon, 17 Oct 2022 10:07:18 -0700 Subject: [PATCH] [PseudoProbe] Replace relocation with offset for entry probe. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently pseudo probe encoding for a function is like: - For the first probe, a relocation from it to its physical position in the code body - For subsequent probes, an incremental offset from the current probe to the previous probe The relocation could potentially cause relocation overflow during link time. I'm now replacing it with an offset from the first probe to the function start address. A source function could be lowered into multiple binary functions due to outlining (e.g, coro-split). Since those binary function have independent link-time layout, to really avoid relocations from .pseudo_probe sections to .text sections, the offset to replace with should really be the offset from the probe's enclosing binary function, rather than from the entry of the source function. This requires some changes to previous section-based emission scheme which now switches to be function-based. The assembly form of pseudo probe directive is also changed correspondingly, i.e, reflecting the binary function name. Most of the source functions end up with only one binary function. For those don't, a sentinel probe is emitted for each of the binary functions with a different name from the source. The sentinel probe indicates the binary function name to differentiate subsequent probes from the ones from a different binary function. For examples, given source function ``` Foo() { … Probe 1 … Probe 2 } ``` If it is transformed into two binary functions: ``` Foo: … Foo.outlined: … ``` The encoding for the two binary functions will be separate: ``` GUID of Foo Probe 1 GUID of Foo Sentinel probe of Foo.outlined Probe 2 ``` Then probe1 will be decoded against binary `Foo`'s address, and Probe 2 will be decoded against `Foo.outlined`. The sentinel probe of `Foo.outlined` makes sure there's not accidental relocation from `Foo.outlined`'s probes to `Foo`'s entry address. On the BOLT side, to be minimal intrusive, the pseudo probe re-encoding sticks with the old encoding format. This is fine since unlike linker, Bolt processes the pseudo probe section as a whole and it is free from relocation overflow issues. The change is downwards compatible as long as there's no mixed use of the old encoding and the new encoding. Reviewed By: wenlei, maksfb Differential Revision: https://reviews.llvm.org/D135912 Differential Revision: https://reviews.llvm.org/D135914 Differential Revision: https://reviews.llvm.org/D136394 --- bolt/lib/Rewrite/RewriteInstance.cpp | 25 ++- llvm/include/llvm/IR/PseudoProbe.h | 11 + llvm/include/llvm/MC/MCObjectFileInfo.h | 2 +- llvm/include/llvm/MC/MCPseudoProbe.h | 53 +++-- llvm/include/llvm/MC/MCStreamer.h | 3 +- .../llvm/Transforms/IPO/SampleProfileProbe.h | 2 - .../CodeGen/AsmPrinter/PseudoProbePrinter.cpp | 3 +- llvm/lib/MC/MCAsmStreamer.cpp | 7 +- llvm/lib/MC/MCObjectFileInfo.cpp | 6 +- llvm/lib/MC/MCParser/AsmParser.cpp | 8 +- llvm/lib/MC/MCPseudoProbe.cpp | 201 ++++++++++-------- llvm/lib/MC/MCStreamer.cpp | 5 +- .../SampleProfile/pseudo-probe-emit-inline.ll | 14 +- .../SampleProfile/pseudo-probe-emit.ll | 21 +- .../llvm-profgen/Inputs/func-split.perfbin | Bin 19408 -> 19920 bytes .../Inputs/inline-cs-pseudoprobe.perfbin | Bin 17976 -> 18632 bytes .../llvm-profgen/inline-force-dwarf.test | 5 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 1 - llvm/tools/llvm-profgen/ProfiledBinary.cpp | 63 ++++-- llvm/tools/llvm-profgen/ProfiledBinary.h | 23 +- 20 files changed, 291 insertions(+), 162 deletions(-) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 56d7868be73d..6ea4ba603698 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -599,10 +599,19 @@ void RewriteInstance::parsePseudoProbe() { errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n"; return; } + + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + for (const BinaryFunction *F : BC->getAllBinaryFunctions()) { + for (const MCSymbol *Sym : F->getSymbols()) { + FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] = + F->getAddress(); + } + } Contents = PseudoProbeSection->getContents(); if (!BC->ProbeDecoder.buildAddress2ProbeMap( - reinterpret_cast(Contents.data()), - Contents.size())) { + reinterpret_cast(Contents.data()), Contents.size(), + GuidFilter, FuncStartAddrs)) { BC->ProbeDecoder.getAddress2ProbesMap().clear(); errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; return; @@ -3426,6 +3435,8 @@ void RewriteInstance::encodePseudoProbes() { // Address of the first probe is absolute. // Other probes' address are represented by delta auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { + assert(!isSentinelProbe(CurProbe->getAttributes()) && + "Sentinel probes should not be emitted"); EmitULEB128IntValue(CurProbe->getIndex()); uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); uint8_t Flag = @@ -3530,9 +3541,17 @@ void RewriteInstance::encodePseudoProbes() { reinterpret_cast(DescContents.data()), DescContents.size()); StringRef ProbeContents = PseudoProbeSection->getOutputContents(); + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + for (const BinaryFunction *F : BC->getAllBinaryFunctions()) { + const uint64_t Addr = + F->isEmitted() ? F->getOutputAddress() : F->getAddress(); + FuncStartAddrs[Function::getGUID( + NameResolver::restore(F->getOneName()))] = Addr; + } DummyDecoder.buildAddress2ProbeMap( reinterpret_cast(ProbeContents.data()), - ProbeContents.size()); + ProbeContents.size(), GuidFilter, FuncStartAddrs); DummyDecoder.printProbesForAllAddresses(outs()); } } diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h index 7d14213143c0..1c109eafde57 100644 --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -24,8 +24,15 @@ class Instruction; constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; +enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; + enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +enum class PseudoProbeAttributes { + Reserved = 0x1, + Sentinel = 0x2, // A place holder for split function entry address. +}; + // The saturated distrution factor representing 100% for block probes. constexpr static uint64_t PseudoProbeFullDistributionFactor = std::numeric_limits::max(); @@ -80,6 +87,10 @@ struct PseudoProbe { float Factor; }; +static inline bool isSentinelProbe(uint32_t Flags) { + return Flags & (uint32_t)PseudoProbeAttributes::Sentinel; +} + Optional extractProbe(const Instruction &Inst); void setProbeDistributionFactor(Instruction &Inst, float Factor); diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index eae2fc2ffbf1..847b9ffc3cfc 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -362,7 +362,7 @@ public: MCSection *getKCFITrapSection(const MCSection &TextSec) const; - MCSection *getPseudoProbeSection(const MCSection *TextSec) const; + MCSection *getPseudoProbeSection(const MCSection &TextSec) const; MCSection *getPseudoProbeDescSection(StringRef FuncName) const; diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index d10d6015cd3c..1b01733e6227 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -15,7 +15,9 @@ // // FUNCTION BODY (one for each outlined function present in the text section) // GUID (uint64) -// GUID of the function +// GUID of the function's source name which may be different from the +// actual binary linkage name. This GUID will be used to decode and +// generate a profile against the source function name. // NPROBES (ULEB128) // Number of probes originating from this function. // NUM_INLINED_FUNCTIONS (ULEB128) @@ -29,7 +31,9 @@ // ATTRIBUTE (uint3) // 1 - reserved // ADDRESS_TYPE (uint1) -// 0 - code address, 1 - address delta +// 0 - code address for regular probes (for downwards compatibility) +// - GUID of linkage name for sentinel probes +// 1 - address delta // CODE_ADDRESS (uint64 or ULEB128) // code address or address delta, depending on ADDRESS_TYPE // INLINED FUNCTION RECORDS @@ -39,11 +43,15 @@ // ID of the callsite probe (ULEB128) // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. +// +// TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility +// is no longer an issue. //===----------------------------------------------------------------------===// #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/PseudoProbe.h" @@ -276,23 +284,20 @@ public: /// Instances of this class represent the pseudo probes inserted into a compile /// unit. -class MCPseudoProbeSection { +class MCPseudoProbeSections { public: - void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe, + void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) { - MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack); + MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack); } // TODO: Sort by getOrdinal to ensure a determinstic section order - using MCProbeDivisionMap = std::map; + using MCProbeDivisionMap = std::map; private: - // A collection of MCPseudoProbe for each text section. The MCPseudoProbes - // are grouped by GUID of the functions where they are from and will be - // encoded by groups. In the comdat scenario where a text section really only - // contains the code of a function solely, the probes associated with a comdat - // function are still grouped by GUIDs due to inlining that can bring probes - // from different functions into one function. + // A collection of MCPseudoProbe for each function. The MCPseudoProbes are + // grouped by GUIDs due to inlining that can bring probes from different + // functions into one function. MCProbeDivisionMap MCProbeDivisions; public: @@ -304,18 +309,18 @@ public: }; class MCPseudoProbeTable { - // A collection of MCPseudoProbe in the current module grouped by text - // sections. MCPseudoProbes will be encoded into a corresponding + // A collection of MCPseudoProbe in the current module grouped by + // functions. MCPseudoProbes will be encoded into a corresponding // .pseudoprobe section. With functions emitted as separate comdats, // a text section really only contains the code of a function solely, and the // probes associated with the text section will be emitted into a standalone // .pseudoprobe section that shares the same comdat group with the function. - MCPseudoProbeSection MCProbeSections; + MCPseudoProbeSections MCProbeSections; public: static void emit(MCObjectStreamer *MCOS); - MCPseudoProbeSection &getProbeSections() { return MCProbeSections; } + MCPseudoProbeSections &getProbeSections() { return MCProbeSections; } #ifndef NDEBUG static int DdgPrintIndent; @@ -341,6 +346,9 @@ class MCPseudoProbeDecoder { /// Points to the end of the buffer. const uint8_t *End = nullptr; + /// Whether encoding is based on a starting probe with absolute code address. + bool EncodingIsAddrBased = false; + // Decoding helper function template ErrorOr readUnencodedNumber(); template ErrorOr readUnsignedNumber(); @@ -348,20 +356,21 @@ class MCPseudoProbeDecoder { ErrorOr readString(uint32_t Size); public: + using Uint64Set = DenseSet; + using Uint64Map = DenseMap; + // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); - // Decode pseudo_probe section to build address to probes map. - bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); - // Decode pseudo_probe section to build address to probes map for specifed // functions only. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, - std::unordered_set &GuildFilter); + const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs); bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, - std::unordered_set &GuildFilter); + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs); // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index ef7ab094217d..2fd0f22899d0 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1105,7 +1105,8 @@ public: /// Emit the a pseudo probe into the current section. virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack); + const MCPseudoProbeInlineStack &InlineStack, + MCSymbol *FnSym); /// Set the bundle alignment mode from now on in the section. /// The argument is the power of 2 to which the alignment is set. The diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index ed296d2dd080..ebac3d6a24ef 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -41,8 +41,6 @@ using ProbeFactorMap = std::unordered_map, float, pair_hash>; using FuncProbeFactorMap = StringMap; -enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; - class PseudoProbeDescriptor { uint64_t FunctionGUID; uint64_t FunctionHash; diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 135eabc34838..3e75b4371033 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -48,5 +48,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, } SmallVector InlineStack(llvm::reverse(ReversedInlineStack)); - Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); + Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, + Asm->CurrentFnSym); } diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index c1fcc815d2e3..eaf22eaa73a1 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -378,7 +378,7 @@ public: void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) override; + const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) override; void emitBundleAlignMode(unsigned AlignPow2) override; void emitBundleLock(bool AlignToEnd) override; @@ -2338,13 +2338,16 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst, void MCAsmStreamer::emitPseudoProbe( uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) { + const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) { OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " " << Attr; // Emit inline stack like // @ GUIDmain:3 @ GUIDCaller:1 @ GUIDDirectCaller:11 for (const auto &Site : InlineStack) OS << " @ " << std::get<0>(Site) << ":" << std::get<1>(Site); + + OS << " " << FnSym->getName(); + EmitEOL(); } diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 1c032369a4e6..b5460ced752a 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -1160,11 +1160,11 @@ MCObjectFileInfo::getKCFITrapSection(const MCSection &TextSec) const { } MCSection * -MCObjectFileInfo::getPseudoProbeSection(const MCSection *TextSec) const { +MCObjectFileInfo::getPseudoProbeSection(const MCSection &TextSec) const { if (Ctx->getObjectFileType() == MCContext::IsELF) { - const auto *ElfSec = static_cast(TextSec); + const auto &ElfSec = static_cast(TextSec); // Create a separate section for probes that comes with a comdat function. - if (const MCSymbol *Group = ElfSec->getGroup()) { + if (const MCSymbol *Group = ElfSec.getGroup()) { auto *S = static_cast(PseudoProbeSection); auto Flags = S->getFlags() | ELF::SHF_GROUP; return Ctx->getELFSection(S->getName(), S->getType(), Flags, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 9a6b53cf5a9f..240828f2a03c 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -5904,10 +5904,16 @@ bool AsmParser::parseDirectivePseudoProbe() { InlineStack.push_back(Site); } + // Parse function entry name + StringRef FnName; + if (parseIdentifier(FnName)) + return Error(getLexer().getLoc(), "unexpected token in '.pseudoprobe' directive"); + MCSymbol *FnSym = getContext().lookupSymbol(FnName); + if (parseEOL()) return true; - getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack); + getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, FnSym); return false; } diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index fdf8bbbe0a4d..a9460b86d22a 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -14,12 +14,17 @@ #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" +#include +#include #include #include #include +#include #define DEBUG_TYPE "mcpseudoprobe" @@ -43,6 +48,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A, void MCPseudoProbe::emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const { + bool IsSentinel = isSentinelProbe(getAttributes()); + assert((LastProbe || IsSentinel) && + "Last probe should not be null for non-sentinel probes"); + // Emit Index MCOS->emitULEB128IntValue(Index); // Emit Type and the flag: @@ -53,10 +62,11 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS, assert(Attributes <= 0x7 && "Probe attributes too big to encode, exceeding 7"); uint8_t PackedType = Type | (Attributes << 4); - uint8_t Flag = LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; + uint8_t Flag = + !IsSentinel ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; MCOS->emitInt8(Flag | PackedType); - if (LastProbe) { + if (!IsSentinel) { // Emit the delta between the address label and LastProbe. const MCExpr *AddrDelta = buildSymbolDiff(MCOS, Label, LastProbe->getLabel()); @@ -67,9 +77,8 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS, MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta)); } } else { - // Emit label as a symbolic code address. - MCOS->emitSymbolValue( - Label, MCOS->getContext().getAsmInfo()->getCodePointerSize()); + // Emit the GUID of the split function that the sentinel probe represents. + MCOS->emitInt64(Guid); } LLVM_DEBUG({ @@ -81,7 +90,7 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS, void MCPseudoProbeInlineTree::addPseudoProbe( const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) { // The function should not be called on the root. - assert(isRoot() && "Should not be called on root"); + assert(isRoot() && "Should only be called on root"); // When it comes here, the input look like: // Probe: GUID of C, ... @@ -128,43 +137,57 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS, dbgs() << "Group [\n"; MCPseudoProbeTable::DdgPrintIndent += 2; }); + assert(!isRoot() && "Root should be handled seperately"); + // Emit probes grouped by GUID. - if (Guid != 0) { - LLVM_DEBUG({ - dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); - dbgs() << "GUID: " << Guid << "\n"; - }); - // Emit Guid - MCOS->emitInt64(Guid); - // Emit number of probes in this node - MCOS->emitULEB128IntValue(Probes.size()); - // Emit number of direct inlinees - MCOS->emitULEB128IntValue(Children.size()); - // Emit probes in this group - for (const auto &Probe : Probes) { - Probe.emit(MCOS, LastProbe); - LastProbe = &Probe; - } - } else { - assert(Probes.empty() && "Root should not have probes"); + LLVM_DEBUG({ + dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); + dbgs() << "GUID: " << Guid << "\n"; + }); + // Emit Guid + MCOS->emitInt64(Guid); + // Emit number of probes in this node, including a sentinel probe for + // top-level functions if needed. + bool NeedSentinel = false; + if (Parent->isRoot()) { + assert(isSentinelProbe(LastProbe->getAttributes()) && + "Starting probe of a top-level function should be a sentinel probe"); + // The main body of a split function doesn't need a sentinel probe. + if (LastProbe->getGuid() != Guid) + NeedSentinel = true; } - // Emit sorted descendant - // InlineSite is unique for each pair, - // so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree* - std::map Inlinees; - for (auto &Child : Children) - Inlinees[Child.first] = Child.second.get(); + MCOS->emitULEB128IntValue(Probes.size() + NeedSentinel); + // Emit number of direct inlinees + MCOS->emitULEB128IntValue(Children.size()); + // Emit sentinel probe for top-level functions + if (NeedSentinel) + LastProbe->emit(MCOS, nullptr); + + // Emit probes in this group + for (const auto &Probe : Probes) { + Probe.emit(MCOS, LastProbe); + LastProbe = &Probe; + } + + // Emit sorted descendant. InlineSite is unique for each pair, so there will + // be no ordering of Inlinee based on MCPseudoProbeInlineTree* + using InlineeType = std::pair; + auto Comparer = [](const InlineeType &A, const InlineeType &B) { + return A.first < B.first; + }; + std::vector Inlinees; + for (const auto &Child : Children) + Inlinees.emplace_back(Child.first, Child.second.get()); + std::sort(Inlinees.begin(), Inlinees.end(), Comparer); for (const auto &Inlinee : Inlinees) { - if (Guid) { - // Emit probe index - MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first)); - LLVM_DEBUG({ - dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); - dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; - }); - } + // Emit probe index + MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first)); + LLVM_DEBUG({ + dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); + dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n"; + }); // Emit the group Inlinee.second->emit(MCOS, LastProbe); } @@ -176,17 +199,37 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS, }); } -void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) { +void MCPseudoProbeSections::emit(MCObjectStreamer *MCOS) { MCContext &Ctx = MCOS->getContext(); - for (auto &ProbeSec : MCProbeDivisions) { - const MCPseudoProbe *LastProbe = nullptr; - if (auto *S = - Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) { + const auto *FuncSym = ProbeSec.first; + const auto &Root = ProbeSec.second; + if (auto *S = Ctx.getObjectFileInfo()->getPseudoProbeSection( + FuncSym->getSection())) { // Switch to the .pseudoprobe section or a comdat group. MCOS->switchSection(S); // Emit probes grouped by GUID. - ProbeSec.second.emit(MCOS, LastProbe); + // Emit sorted descendant. InlineSite is unique for each pair, so there + // will be no ordering of Inlinee based on MCPseudoProbeInlineTree* + using InlineeType = std::pair; + auto Comparer = [](const InlineeType &A, const InlineeType &B) { + return A.first < B.first; + }; + std::vector Inlinees; + for (const auto &Child : Root.getChildren()) + Inlinees.emplace_back(Child.first, Child.second.get()); + std::sort(Inlinees.begin(), Inlinees.end(), Comparer); + + for (const auto &Inlinee : Inlinees) { + // Emit the group guarded by a sentinel probe. + MCPseudoProbe SentinelProbe(const_cast(FuncSym), + MD5Hash(FuncSym->getName()), + (uint32_t)PseudoProbeReservedId::Invalid, + (uint32_t)PseudoProbeType::Block, + (uint32_t)PseudoProbeAttributes::Sentinel); + const MCPseudoProbe *Probe = &SentinelProbe; + Inlinee.second->emit(MCOS, Probe); + } } } } @@ -360,39 +403,13 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - std::unordered_set &GuildFilter) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { // The pseudo_probe section encodes an inline forest and each tree has a - // format like: - // FUNCTION BODY (one for each uninlined function present in the text - // section) - // GUID (uint64) - // GUID of the function - // NPROBES (ULEB128) - // Number of probes originating from this function. - // NUM_INLINED_FUNCTIONS (ULEB128) - // Number of callees inlined into this function, aka number of - // first-level inlinees - // PROBE RECORDS - // A list of NPROBES entries. Each entry contains: - // INDEX (ULEB128) - // TYPE (uint4) - // 0 - block probe, 1 - indirect call, 2 - direct call - // ATTRIBUTE (uint3) - // 1 - tail call, 2 - dangling - // ADDRESS_TYPE (uint1) - // 0 - code address, 1 - address delta - // CODE_ADDRESS (uint64 or ULEB128) - // code address or address delta, depending on Flag - // INLINED FUNCTION RECORDS - // A list of NUM_INLINED_FUNCTIONS entries describing each of the - // inlined callees. Each record contains: - // INLINE SITE - // Index of the callsite probe (ULEB128) - // FUNCTION BODY - // A FUNCTION BODY entry describing the inlined function. + // format defined in MCPseudoProbe.h uint32_t Index = 0; - if (Cur == &DummyInlineRoot) { + bool IsTopLevelFunc = Cur == &DummyInlineRoot; + if (IsTopLevelFunc) { // Use a sequential id for top level inliner. Index = Cur->getChildren().size(); } else { @@ -410,8 +427,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( uint64_t Guid = std::move(*ErrorOrCurGuid); // Decide if top-level node should be disgarded. - if (Cur == &DummyInlineRoot && !GuildFilter.empty() && - !GuildFilter.count(Guid)) + if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid)) Cur = nullptr; // If the incoming node is null, all its children nodes should be disgarded. @@ -419,6 +435,10 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // Switch/add to a new tree node(inlinee) Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); Cur->Guid = Guid; + if (IsTopLevelFunc && !EncodingIsAddrBased) { + if (auto V = FuncStartAddrs.lookup(Guid)) + LastAddr = V; + } } // Read number of probes in the current node. @@ -457,9 +477,21 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( if (!ErrorOrAddr) return false; Addr = std::move(*ErrorOrAddr); + if (isSentinelProbe(Attr)) { + // For sentinel probe, the addr field actually stores the GUID of the + // split function. Convert it to the real address. + if (auto V = FuncStartAddrs.lookup(Addr)) + Addr = V; + } else { + // For now we assume all probe encoding should be either based on + // leading probe address or function start address. + // The scheme is for downwards compatibility. + // TODO: retire this scheme once compatibility is no longer an issue. + EncodingIsAddrBased = true; + } } - if (Cur) { + if (Cur && !isSentinelProbe(Attr)) { // Populate Address2ProbesMap auto &Probes = Address2ProbesMap[Addr]; Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, @@ -471,30 +503,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuildFilter); + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); } return true; } bool MCPseudoProbeDecoder::buildAddress2ProbeMap( - const uint8_t *Start, std::size_t Size, - std::unordered_set &GuildFilter) { + const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter, + const Uint64Map &FuncStartAddrs) { Data = Start; End = Data + Size; uint64_t LastAddr = 0; while (Data < End) - buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter); + buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, + FuncStartAddrs); assert(Data == End && "Have unprocessed data in pseudo_probe section"); return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, - std::size_t Size) { - std::unordered_set GuildFilter; - return buildAddress2ProbeMap(Start, Size, GuildFilter); -} - void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; // Make the output deterministic diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index de303f5fd514..fac9f8937d98 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1102,7 +1102,8 @@ void MCStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &) { void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, - const MCPseudoProbeInlineStack &InlineStack) { + const MCPseudoProbeInlineStack &InlineStack, + MCSymbol *FnSym) { auto &Context = getContext(); // Create a symbol at in the current section for use in the probe. @@ -1116,7 +1117,7 @@ void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, // Add the probe entry to this section's entries. Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe( - getCurrentSectionOnly(), Probe, InlineStack); + FnSym, Probe, InlineStack); } void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll index 53a122653e71..3d278a9ba27f 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -10,17 +10,18 @@ ; RUN: llvm-mc -filetype=obj <%t1 -o %t4 ; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ + define dso_local void @foo2() !dbg !7 { ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]] -; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 foo2 ret void, !dbg !10 } define dso_local void @foo() #0 !dbg !11 { ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]] -; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 +; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 foo call void @foo2(), !dbg !12 ret void, !dbg !13 } @@ -29,9 +30,9 @@ define dso_local i32 @entry() !dbg !14 { ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]] ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]] -; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 -; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 +; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 entry +; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 entry +; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 entry call void @foo(), !dbg !18 ret i32 0, !dbg !19 } @@ -71,6 +72,7 @@ define dso_local i32 @entry() !dbg !14 { ; CHECK-OBJ: .pseudo_probe_desc ; CHECK-OBJ: .pseudo_probe +; CHECK-OBJ-NOT: .rela.pseudo_probe !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index c8b9baf44a20..f39b03c01dc6 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -18,15 +18,15 @@ bb0: %cmp = icmp eq i32 %x, 0 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 foo br i1 %cmp, label %bb1, label %bb2 bb1: ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo store i32 6, ptr @a, align 4 br label %bb3 @@ -34,8 +34,8 @@ bb2: ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 -; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 foo +; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo store i32 8, ptr @a, align 4 br label %bb3 @@ -44,22 +44,22 @@ bb3: ret void, !dbg !12 } -declare void @bar(i32 %x) +declare void @bar(i32 %x) define internal void @foo2(ptr %f) !dbg !4 { entry: ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 +; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo2 ; Check pseudo_probe metadata attached to the indirect call instruction. ; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0 +; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0 foo2 call void %f(i32 1), !dbg !13 ; Check pseudo_probe metadata attached to the direct call instruction. ; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 3, 2, 0 -; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0 +; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0 foo2 call void @bar(i32 1) ret void } @@ -92,7 +92,8 @@ entry: ; CHECK-ASM-NEXT: .ascii "foo2" ; CHECK-OBJ-COUNT-2: .pseudo_probe_desc -; CHECK-OBJ-COUNT-2: .pseudo_probe +; CHECK-OBJ: .pseudo_probe +; CHECK-OBJ-NOT: .rela.pseudo_probe !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} diff --git a/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin b/llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin index cc01a7c422124dde7d58ffa20f22585fba97a2d3..287340076c288933955aa280197eda4c7cb26ab8 100755 GIT binary patch delta 2575 zcmds2O-vI(6rS03s}+$g)vl{iD?*7V?QTn3sIh{YC=h?65sg2zr7A`WC?1S4#u$$p z^~HGbpk9nY58!G7{sb_=c+u#=1oa}ugW+I;;p9Pmvz=9}ZDJY@#%cP^%=_kj^Jd=L z-IrJB%d7OlQo6Rc;-y>+TT91q8|KrEoO#FEuj zAvGMY(Sm9)7)&JMRaKfl6p95~LQ4rLAcRQxM=7$du9OBkG&K~4&(dyqT0G06Fp9M_ zEp{9}cAz!oPQ}`i5qB(=YLB<32!U_KE-LD64kvZx5IiqWv0!xe6bnvygn0`p+3%2$ zW*C=t=9d?HAtsR}nnI`3sWgkVI3^^?dl!z=>^bLrPbQl^9?1KExr*dJ&MAy69?5m= zrzLrm^7Aq$os`rYOPtDPTP@2exzGFGlKKbxt00)K=`)K z>}LLbK^R%qz5kq%uQaIG34_*JV>Ta=?zqL!I*e$c+xsU?wmtkgAyypX+8%}lBAYyB z!u!V$6DGuj{>Tgw#mG0AN zP^LFQo#dbH^5py(2%SQGgc@hW{DdV!W)PoJI+}Tf18=C9WI7sS_VUis|`#T zc*?+Q2KF&jUR$#3Pr#P2tT@;m5TVCt)I8rcoVTFLl#(`xoj(2MLYWCc9e^^Dls zU=-Oi$Wk(wluFHp8-8Yub_pESveM6eC0i}Gr@LXOLpImGAEC3_N58-gTw}q4pky{7 zvzmWXm1l~NX+0b`dJ$U1}~@;)QNcd$4j&l}^rm{pPFL3|ZqYs9O4 zV>G7C{G8#`XnovG&qc09iM@qE0%YJ5~$|>|xR0 zuKYkP0oAV$tlDJJr8RCF)j_LYCG;(TBlRKSf)~2$qrx5^jMYc&0(;W24T9DXwm;#s pA?R!f3uiRwZiou2{V;~zGe1-`M(yg#w8IAIYzzx0*Fblp=LcUu7a;UwGV|ASui5vs^BUM08_}~pWXD;74 z-#PQ$pSgb)?w^7aD`4%Fm6t3cV*>qX=zmzk07vv{TVgI~xY^mcHkMI$X_-uSDx`%~ zHIix!t39#CNV>5*6bovpU^Enus3|qLg3{;E-)ZmwW9yS%XzYte!VLd*{yHxd2rkuB zO;zJs3=g|%pjAKV@<2_6lo9tAryQUE#VKD=3N*@{QL6)8)dSee-RGLVz2Wb9tuY z@-=%47jUfPKgd~%uO#OqU&R(GrBv7&M%UjUrQsF;;^n}S^`nL!)-kdc_~pSzlWh;D zCd^8Y47T4j>|8G4Bja(Nk!H)1d5liQ55HqVSqNjeUAgQtAS?44VF3SDISfY!-8k&P zdiL!}`v>}RS*%kNqSOAvIhz=;HDHfg2aE8Sdb0cTFOn&)LLkkQ-u&*2vOkx}D!F_* zl~W4&!9r%BV0wQ#pX=_;7mV5kJsTVZ{gvjmLPi&(62Ns_7L(-Lbh}yE#DCQ7v~dr& z5c=G>`veCwH;EbY4-OGpZJ9)Ur4wnQKjM{`1RJ=`a(k59i`~SfRzYyiV#bmQ2FgxyS4#-x!LGNJZJr)V|6Y3Hq zR$tb59AUKt?{#_g9YLJh(fYGBFTtY4L;7fPvqi6Jt+T^b47RNiPSoPwwusQ%fWvJG zA?(NTwuJb^KiSA#47Nwb3xUbm6YOt~3WpRNZchl!Dvp!8qvFbrgjnA+8L}1oJEFqD JBo24@{s%Tle1HG| diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin index ba02c2e59e6943418933d145389dc392a18b6deb..07bfbdf68d41921f3234869f0b0f7c0dbaa3ab97 100755 GIT binary patch delta 2894 zcmeHJPi)&%82_FVCvj@huA$8ef_kCTk~X&EB(}4vOIg#Z)l^N>pi{LAa}v8vm)KU4 zRviM;141A`6rO|_muUz72_%>T2#~gl6FVRQ6I{4}5El+0jtFt!*^Z(e7J)>!1KN|` z=il#p@AL2TXD9FbFHu#ILy<9cV z>y2q{d3m0bGD2FAxJ0wLJ}ruC)t0bTwFN^f>m|!NFO;piSkYRVv(nIQt0`K|x=y&h z-O?MTRugNrtvYWvtPQ=~;wqV3SyogzTat59MUoXmlk?eT@5!%YJgi^XX!v3PPN$>jxENOPHVMoP<4j!T@@E8LRS^8CCc zCt-k;amcA90$4mgfwH!gR#aSI)9lTm_;HrY-RuAubE2H#MbdU{bFFG}rdHRdIn6Y! zQq=^&CnKYX?p$S~XzbQn0K51&%kR5fXL~T;U(?11V*9K4Vl0X}tHY<%1HX6UpK^R> z-goSflut6q@`hrfNo5BwS^ir7Z?&^~Q^cn~J>twKf=erlTYY^A1x=u{) zg4oiVEzxf3n-z<6K_wjw%Yq9~g(g(Z7SuIzu5W|0ODIE0Yrr@7`Kb&V!PllThZ+LS z1VV@DV1R%VbZ~}-N9cio?;e4U1ZHTTfk#t^kN)GX!1^BL2ZYp*9vC~Z8U2|8wBz^h z0RAjBgWpTB2=J$=WB4a3jwkpWPVhtMC_co;@D3kx*@nwL=(0;L`?1TucG-}?p_e+6 zuoCuV)0xh5`L7r%kMP+!g*nHP_eaJb_~M)zUcJ|2HjTfZQ^MD~jSr#s6_s#)ytjIm z(6=Xg^!1{`9Gvv%F?@g@!M_#b)XF3to-Z&@PI(J&x(nufoOvkad5ZYryvi87$3FK0 v$@&q&^L*!egoQZsSJ(5ZcaMcQ^_qxZStwAe3cj&W2ruQmh(AfDGY|d^f&E+} delta 2293 zcmcIlUuaWT82`?_xi`5Ds1i z(rty&=3x&aK12=`*~7L9f{KVzl)gA&Ln!!ApTkkdFvtsI@>-Q|?FKu+TvwQ_WhcbAjh0y&Ly)XLF08j-{K z|7O@bpYZsAD#3ayj|nhaIk;Z=)>d#P z(E1)vrvE%6ei0skuFTaw;<<>6Io(r<;+ZBd{p+(_b6bzPf*U5%8~||^fWxe zJ}HE@u4IYNc`l-;h4;-&`r5zsFbr zq3d@&k5a#`opK-q>nDsoavu|9uv6}9iQevLAD(Ibivd(wn6p%HKl35JDaYCu+#=cN z*F)7|6D(k)wAEzkNdeGouuk?TGn=lHeQ0LSD>D8=X<2%xB@y-i8XQS4 aV+&yKP@Gu_ ProfiledFunctions; for (auto *Func : Binary->getProfiledFunctions()) Binary->computeInlinedContextSizeForFunc(Func); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 2c9873cf18aa..f639fcc9f64b 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -204,11 +204,6 @@ void ProfiledBinary::load() { // Find the preferred load address for text sections. setPreferredTextSegmentAddresses(Obj); - checkPseudoProbe(Obj); - - if (ShowDisassemblyOnly) - decodePseudoProbe(Obj); - // Load debug info of subprograms from DWARF section. // If path of debug info binary is specified, use the debug info from it, // otherwise use the debug info from the executable binary. @@ -220,6 +215,17 @@ void ProfiledBinary::load() { loadSymbolsFromDWARF(*cast(&ExeBinary)); } + DisassembleFunctionSet.insert(DisassembleFunctions.begin(), + DisassembleFunctions.end()); + + checkPseudoProbe(Obj); + + if (UsePseudoProbes) + populateElfSymbolAddressList(Obj); + + if (ShowDisassemblyOnly) + decodePseudoProbe(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -352,10 +358,31 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { if (!UsePseudoProbes) return; - std::unordered_set ProfiledGuids; - if (!ShowDisassemblyOnly) - for (auto *F : ProfiledFunctions) - ProfiledGuids.insert(Function::getGUID(F->FuncName)); + MCPseudoProbeDecoder::Uint64Set GuidFilter; + MCPseudoProbeDecoder::Uint64Map FuncStartAddresses; + if (ShowDisassemblyOnly) { + if (DisassembleFunctionSet.empty()) { + FuncStartAddresses = SymbolStartAddrs; + } else { + for (auto &F : DisassembleFunctionSet) { + auto GUID = Function::getGUID(F.first()); + if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) { + FuncStartAddresses[GUID] = StartAddr; + FuncRange &Range = StartAddrToFuncRangeMap[StartAddr]; + GuidFilter.insert(Function::getGUID(Range.getFuncName())); + } + } + } + } else { + for (auto *F : ProfiledFunctions) { + GuidFilter.insert(Function::getGUID(F->FuncName)); + for (auto &Range : F->Ranges) { + auto GUIDs = StartAddrToSymMap.equal_range(Range.first); + for (auto I = GUIDs.first; I != GUIDs.second; ++I) + FuncStartAddresses[I->second] = I->first; + } + } + } StringRef FileName = Obj->getFileName(); for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); @@ -374,7 +401,7 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { StringRef Contents = unwrapOrError(Section.getContents(), FileName); if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), - Contents.size(), ProfiledGuids)) + Contents.size(), GuidFilter, FuncStartAddresses)) exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); } } @@ -578,8 +605,6 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { for (std::pair &SecSyms : AllSymbols) stable_sort(SecSyms.second); - DisassembleFunctionSet.insert(DisassembleFunctions.begin(), - DisassembleFunctions.end()); assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) && "Functions to disassemble should be only specified together with " "--show-disassembly-only"); @@ -653,6 +678,20 @@ void ProfiledBinary::checkUseFSDiscriminator( } } +void ProfiledBinary::populateElfSymbolAddressList( + const ELFObjectFileBase *Obj) { + // Create a mapping from virtual address to symbol GUID and the other way + // around. + StringRef FileName = Obj->getFileName(); + for (const SymbolRef &Symbol : Obj->symbols()) { + const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + uint64_t GUID = Function::getGUID(Name); + SymbolStartAddrs[GUID] = Addr; + StartAddrToSymMap.emplace(Addr, GUID); + } +} + void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { for (const auto &DieInfo : CompilationUnit.dies()) { llvm::DWARFDie Die(&CompilationUnit, &DieInfo); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 67b7e40b2a51..29206b1c2a93 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -11,6 +11,7 @@ #include "CallContext.h" #include "ErrorHandling.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -166,8 +167,8 @@ public: using ProbeFrameStack = SmallVector>; void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, - MCDecodedPseudoProbeInlineTree &ProbeNode, - ProbeFrameStack &Context); + MCDecodedPseudoProbeInlineTree &ProbeNode, + ProbeFrameStack &Context); void dump() { RootContext.dumpTree(); } @@ -218,8 +219,14 @@ class ProfiledBinary { // A list of binary functions that have samples. std::unordered_set ProfiledFunctions; + // GUID to Elf symbol start address map + DenseMap SymbolStartAddrs; + + // Start address to Elf symbol GUID map + std::unordered_multimap StartAddrToSymMap; + // An ordered map of mapping function's start address to function range - // relevant info. Currently to determine if the address of ELF is the start of + // relevant info. Currently to determine if the offset of ELF is the start of // a real function, we leverage the function range info from DWARF. std::map StartAddrToFuncRangeMap; @@ -278,7 +285,8 @@ class ProfiledBinary { void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O); template - void setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName); + void setPreferredTextSegmentAddresses(const ELFFile &Obj, + StringRef FileName); void checkPseudoProbe(const ELFObjectFileBase *Obj); @@ -298,6 +306,9 @@ class ProfiledBinary { // Load debug info from DWARF unit. void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit); + // Create elf symbol to its start address mapping. + void populateElfSymbolAddressList(const ELFObjectFileBase *O); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start address of a function is the real entry of the // function and also set false to the non-function label. @@ -348,7 +359,9 @@ public: return Address - BaseAddress + getPreferredBaseAddress(); } // Return the preferred load address for the first executable segment. - uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; } + uint64_t getPreferredBaseAddress() const { + return PreferredTextSegmentAddresses[0]; + } // Return the preferred load address for the first loadable segment. uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; } // Return the file offset for the first executable segment.