[CSSPGO] Load context profile for external functions in PreLink and populate ThinLTO import list

For ThinLTO's prelink compilation, we need to put external inline candidates into an import list attached to function's entry count metadata. This enables ThinLink to treat such cross module callee as hot in summary index, and later helps postlink to import them for profile guided cross module inlining.

For AutoFDO, the import list is retrieved by traversing the nested inlinee functions. For CSSPGO, since profile is flatterned, a few things need to happen for it to work:

 - When loading input profile in extended binary format, we need to load all child context profile whose parent is in current module, so context trie for current module includes potential cross module inlinee.
 - In order to make the above happen, we need to know whether input profile is CSSPGO profile before start reading function profile, hence a flag for profile summary section is added.
 - When searching for cross module inline candidate, we need to walk through the context trie instead of nested inlinee profile (callsite sample of AutoFDO profile).
 - Now that we have more accurate counts with CSSPGO, we swtiched to use entry count instead of total count to decided if an external callee is potentially beneficial to inline. This make it consistent with how we determine whether call tagert is potential inline candidate.

Differential Revision: https://reviews.llvm.org/D98590
This commit is contained in:
Wenlei He 2021-03-13 13:55:28 -08:00
parent 9cf5220c5c
commit a5d30421a6
10 changed files with 258 additions and 41 deletions

View File

@ -187,7 +187,10 @@ enum class SecProfSummaryFlags : uint32_t {
/// SecFlagPartial means the profile is for common/shared code. /// SecFlagPartial means the profile is for common/shared code.
/// The common profile is usually merged from profiles collected /// The common profile is usually merged from profiles collected
/// from running other targets. /// from running other targets.
SecFlagPartial = (1 << 0) SecFlagPartial = (1 << 0),
/// SecFlagContext means this is context-sensitive profile for
/// CSSPGO
SecFlagFullContext = (1 << 1)
}; };
enum class SecFuncMetadataFlags : uint32_t { enum class SecFuncMetadataFlags : uint32_t {
@ -730,7 +733,7 @@ public:
/// corresponding function is no less than \p Threshold, add its corresponding /// corresponding function is no less than \p Threshold, add its corresponding
/// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
/// to \p S. /// to \p S.
void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M, void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
const StringMap<Function *> &SymbolMap, const StringMap<Function *> &SymbolMap,
uint64_t Threshold) const { uint64_t Threshold) const {
if (TotalSamples <= Threshold) if (TotalSamples <= Threshold)
@ -753,7 +756,7 @@ public:
} }
for (const auto &CS : CallsiteSamples) for (const auto &CS : CallsiteSamples)
for (const auto &NameFS : CS.second) for (const auto &NameFS : CS.second)
NameFS.second.findInlinedFunctions(S, M, SymbolMap, Threshold); NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
} }
/// Set the name of the function. /// Set the name of the function.

View File

@ -115,6 +115,8 @@ public:
bool MergeContext = true); bool MergeContext = true);
// Query base profile for a given function by name. // Query base profile for a given function by name.
FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext); FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext);
// Retrieve the context trie node for given profile context
ContextTrieNode *getContextFor(const SampleContext &Context);
// Mark a context profile as inlined when function is inlined. // Mark a context profile as inlined when function is inlined.
// This makes sure that inlined context profile will be excluded in // This makes sure that inlined context profile will be excluded in
// function's base profile. // function's base profile.
@ -127,7 +129,6 @@ public:
private: private:
ContextTrieNode *getContextFor(const DILocation *DIL); ContextTrieNode *getContextFor(const DILocation *DIL);
ContextTrieNode *getContextFor(const SampleContext &Context);
ContextTrieNode *getCalleeContextFor(const DILocation *DIL, ContextTrieNode *getCalleeContextFor(const DILocation *DIL,
StringRef CalleeName); StringRef CalleeName);
ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,

View File

@ -38,6 +38,7 @@
#include <cstdint> #include <cstdint>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <set>
#include <system_error> #include <system_error>
#include <vector> #include <vector>
@ -577,6 +578,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
return EC; return EC;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
Summary->setPartialProfile(true); Summary->setPartialProfile(true);
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
FunctionSamples::ProfileIsCS = ProfileIsCS = true;
break; break;
case SecNameTable: { case SecNameTable: {
FixedLengthMD5 = FixedLengthMD5 =
@ -687,6 +690,46 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
if (std::error_code EC = readFuncProfile(FuncProfileAddr)) if (std::error_code EC = readFuncProfile(FuncProfileAddr))
return EC; return EC;
} }
} else if (FunctionSamples::ProfileIsCS) {
// Compute the ordered set of names, so we can
// get all context profiles under a subtree by
// iterating through the ordered names.
struct Comparer {
// Ignore the closing ']' when ordering context
bool operator()(const StringRef &L, const StringRef &R) const {
return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
}
};
std::set<StringRef, Comparer> OrderedNames;
for (auto Name : FuncOffsetTable) {
OrderedNames.insert(Name.first);
}
// For each function in current module, load all
// context profiles for the function.
for (auto NameOffset : FuncOffsetTable) {
StringRef ContextName = NameOffset.first;
SampleContext FContext(ContextName);
auto FuncName = FContext.getNameWithoutContext();
if (!FuncsToUse.count(FuncName) &&
(!Remapper || !Remapper->exist(FuncName)))
continue;
// For each context profile we need, try to load
// all context profile in the subtree. This can
// help profile guided importing for ThinLTO.
auto It = OrderedNames.find(ContextName);
while (It != OrderedNames.end() &&
It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
assert(FuncProfileAddr < End && "out of LBRProfile section");
if (std::error_code EC = readFuncProfile(FuncProfileAddr))
return EC;
// Remove loaded context profile so we won't
// load it repeatedly.
It = OrderedNames.erase(It);
}
}
} else { } else {
for (auto NameOffset : FuncOffsetTable) { for (auto NameOffset : FuncOffsetTable) {
SampleContext FContext(NameOffset.first); SampleContext FContext(NameOffset.first);
@ -704,8 +747,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
} }
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile"); "Cannot have both context-sensitive and regular profile");
ProfileIsCS = (CSProfileCount > 0); assert(ProfileIsCS == (CSProfileCount > 0) &&
FunctionSamples::ProfileIsCS = ProfileIsCS; "Section flag should be consistent with actual profile");
return sampleprof_error::success; return sampleprof_error::success;
} }
@ -1034,6 +1077,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
case SecProfSummary: case SecProfSummary:
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
Flags.append("partial,"); Flags.append("partial,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
Flags.append("context,");
break; break;
default: default:
break; break;

View File

@ -237,6 +237,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
setToCompressSection(SecProfileSymbolList); setToCompressSection(SecProfileSymbolList);
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased) if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased); addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
uint64_t SectionStart = markSectionStart(Type, LayoutIdx); uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
switch (Type) { switch (Type) {

View File

@ -365,6 +365,10 @@ protected:
findFunctionSamples(const Instruction &I) const override; findFunctionSamples(const Instruction &I) const override;
std::vector<const FunctionSamples *> std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
void findExternalInlineCandidate(const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap,
uint64_t Threshold);
// Attempt to promote indirect call and also inline the promoted call // Attempt to promote indirect call and also inline the promoted call
bool tryPromoteAndInlineCandidate( bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
@ -922,6 +926,60 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
} }
} }
void SampleProfileLoader::findExternalInlineCandidate(
const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
assert(Samples && "expect non-null caller profile");
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
if (!ProfileIsCS) {
Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
return;
}
ContextTrieNode *Caller =
ContextTracker->getContextFor(Samples->getContext());
std::queue<ContextTrieNode *> CalleeList;
CalleeList.push(Caller);
while (!CalleeList.empty()) {
ContextTrieNode *Node = CalleeList.front();
CalleeList.pop();
FunctionSamples *CalleeSample = Node->getFunctionSamples();
// For CSSPGO profile, retrieve candidate profile by walking over the
// trie built for context profile. Note that also take call targets
// even if callee doesn't have a corresponding context profile.
if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
Function *Func = SymbolMap.lookup(Name);
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
for (const auto &BS : CalleeSample->getBodySamples())
for (const auto &TS : BS.second.getCallTargets())
if (TS.getValue() > Threshold) {
StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
const Function *Callee = SymbolMap.lookup(CalleeName);
if (!Callee || Callee->isDeclaration())
InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
}
// Import hot child context profile associted with callees. Note that this
// may have some overlap with the call target loop above, but doing this
// based child context profile again effectively allow us to use the max of
// entry count and call target count to determine importing.
for (auto &Child : Node->getAllChildContext()) {
ContextTrieNode *CalleeNode = &Child.second;
CalleeList.push(CalleeNode);
}
}
}
/// Iteratively inline hot callsites of a function. /// Iteratively inline hot callsites of a function.
/// ///
/// Iteratively traverse all callsites of the function \p F, and find if /// Iteratively traverse all callsites of the function \p F, and find if
@ -994,8 +1052,8 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum; uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), SymbolMap, findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold()); PSI->getOrCompHotCountThreshold());
continue; continue;
} }
if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
@ -1014,9 +1072,9 @@ bool SampleProfileLoader::inlineHotFunctions(
LocalChanged = true; LocalChanged = true;
} }
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions( findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
InlinedGUIDs, F.getParent(), SymbolMap, SymbolMap,
PSI->getOrCompHotCountThreshold()); PSI->getOrCompHotCountThreshold());
} }
} }
Changed |= LocalChanged; Changed |= LocalChanged;
@ -1268,8 +1326,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
for (const auto *FS : CalleeSamples) { for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well // TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), SymbolMap, findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold()); PSI->getOrCompHotCountThreshold());
continue; continue;
} }
uint64_t EntryCountDistributed = uint64_t EntryCountDistributed =
@ -1314,9 +1372,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Changed = true; Changed = true;
} }
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions( findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
InlinedGUIDs, F.getParent(), SymbolMap, SymbolMap, PSI->getOrCompHotCountThreshold());
PSI->getOrCompHotCountThreshold());
} }
} }

View File

@ -0,0 +1,27 @@
[main]:154:2
2: 12
3: 18 _Z5funcAi:11
3.1: 18 _Z5funcBi:19
[main:3.1 @ _Z5funcBi]:120:7040
0: 7001
1: 19 _Z8funcLeafi:9999
3: 12
[main:3.1 @ _Z5funcBi @ _Z5funcBiLeaf2]:1:9010
0: 7001
1: 19 _Z8funcLeafi3:9999
3: 12
[main:2 @ _Z5funcAi]:99:11
0: 10
1: 10 _Z8funcLeafi:11
2: 287864 _Z3fibi:315608
3: 24
[main:3 @ _Z5funcCi]:23254:11
0: 10
1: 23250
[main:3 @ _Z5funcDi]:23:45201
0: 10
1: 23250
[main:2 @ _Z5funcAi:2 @ _Z3fibi]:120:101
0: 99
1: 6
3: 97

View File

@ -0,0 +1,78 @@
; Make sure Import GUID list for ThinLTO properly set for CSSPGO
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof.extbin -S | FileCheck %s
declare i32 @_Z5funcBi(i32 %x)
declare i32 @_Z5funcAi(i32 %x)
define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
entry:
br label %for.body, !dbg !25
for.cond.cleanup: ; preds = %for.body
ret i32 %add3, !dbg !27
for.body: ; preds = %for.body, %entry
%x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
%r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
%call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
%add = add nuw nsw i32 %x.011, 1, !dbg !31
%call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
%add2 = add i32 %call, %r.010, !dbg !34
%add3 = add i32 %add2, %call1, !dbg !35
%dec = add nsw i32 %x.011, -1, !dbg !36
%cmp = icmp eq i32 %x.011, 0, !dbg !38
br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
}
; Make sure the ImportGUID stays with entry count metadata for ThinLTO-PreLink
; CHECK: distinct !DISubprogram(name: "main"
; CHECK: !{!"function_entry_count", i64 3, i64 446061515086924981, i64 3815895320998406042, i64 7102633082150537521, i64 -2862076748587597320}
attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!14, !15, !16}
!llvm.ident = !{!17}
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
!4 = !{}
!5 = !{!6, !10, !11}
!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9}
!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
!12 = !{!0}
!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
!14 = !{i32 7, !"Dwarf Version", i32 4}
!15 = !{i32 2, !"Debug Info Version", i32 3}
!16 = !{i32 1, !"wchar_size", i32 4}
!17 = !{!"clang version 11.0.0"}
!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
!19 = !DISubroutineType(types: !20)
!20 = !{!9}
!21 = !{!22, !23}
!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
!25 = !DILocation(line: 13, column: 3, scope: !26)
!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
!27 = !DILocation(line: 17, column: 3, scope: !18)
!28 = !DILocation(line: 13, column: 10, scope: !29)
!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
!31 = !DILocation(line: 14, column: 29, scope: !29)
!32 = !DILocation(line: 14, column: 21, scope: !33)
!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
!34 = !DILocation(line: 14, column: 19, scope: !29)
!35 = !DILocation(line: 14, column: 7, scope: !29)
!36 = !DILocation(line: 13, column: 33, scope: !37)
!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
!38 = !DILocation(line: 13, column: 26, scope: !39)
!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)

View File

@ -198,6 +198,33 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) {
return Ret.first->second; return Ret.first->second;
} }
void CSProfileGenerator::generateProfile() {
FunctionSamples::ProfileIsCS = true;
for (const auto &BI : BinarySampleCounters) {
ProfiledBinary *Binary = BI.first;
for (const auto &CI : BI.second) {
const StringBasedCtxKey *CtxKey =
dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
StringRef ContextId(CtxKey->Context);
// Get or create function profile for the range
FunctionSamples &FunctionProfile =
getFunctionProfileForContext(ContextId);
// Fill in function body samples
populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
Binary);
// Fill in boundary sample counts as well as call site samples for calls
populateFunctionBoundarySamples(ContextId, FunctionProfile,
CI.second.BranchCounter, Binary);
}
}
// Fill in call site value sample for inlined calls and also use context to
// infer missing samples. Since we don't have call count for inlined
// functions, we estimate it from inlinee's profile using the entry of the
// body sample.
populateInferredFunctionSamples();
}
void CSProfileGenerator::updateBodySamplesforFunctionProfile( void CSProfileGenerator::updateBodySamplesforFunctionProfile(
FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc, FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
uint64_t Count) { uint64_t Count) {
@ -422,6 +449,7 @@ extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
void PseudoProbeCSProfileGenerator::generateProfile() { void PseudoProbeCSProfileGenerator::generateProfile() {
// Enable pseudo probe functionalities in SampleProf // Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true; FunctionSamples::ProfileIsProbeBased = true;
FunctionSamples::ProfileIsCS = true;
for (const auto &BI : BinarySampleCounters) { for (const auto &BI : BinarySampleCounters) {
ProfiledBinary *Binary = BI.first; ProfiledBinary *Binary = BI.first;
for (const auto &CI : BI.second) { for (const auto &CI : BI.second) {

View File

@ -65,31 +65,7 @@ public:
: BinarySampleCounters(Counters){}; : BinarySampleCounters(Counters){};
public: public:
void generateProfile() override { void generateProfile() override;
for (const auto &BI : BinarySampleCounters) {
ProfiledBinary *Binary = BI.first;
for (const auto &CI : BI.second) {
const StringBasedCtxKey *CtxKey =
dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
StringRef ContextId(CtxKey->Context);
// Get or create function profile for the range
FunctionSamples &FunctionProfile =
getFunctionProfileForContext(ContextId);
// Fill in function body samples
populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
Binary);
// Fill in boundary sample counts as well as call site samples for calls
populateFunctionBoundarySamples(ContextId, FunctionProfile,
CI.second.BranchCounter, Binary);
}
}
// Fill in call site value sample for inlined calls and also use context to
// infer missing samples. Since we don't have call count for inlined
// functions, we estimate it from inlinee's profile using the entry of the
// body sample.
populateInferredFunctionSamples();
}
// Remove adjacent repeated context sequences up to a given sequence length, // Remove adjacent repeated context sequences up to a given sequence length,
// -1 means no size limit. Note that repeated sequences are identified based // -1 means no size limit. Note that repeated sequences are identified based