[llvm-profgen] Strip context to support non-CS profile generation for hybrid sample

Differential Revision: https://reviews.llvm.org/D109769
This commit is contained in:
wlei 2021-09-24 11:32:32 -07:00
parent c3717b6858
commit a03cf331e1
7 changed files with 83 additions and 47 deletions

View File

@ -2,6 +2,8 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
; RUN: FileCheck %s --input-file %t
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
; CHECK:[main:1 @ foo]:309:0
; CHECK: 2.1: 14
@ -11,6 +13,18 @@
; CHECK:[main:1 @ foo:3.1 @ bar]:84:0
; CHECK: 1: 14
; CHECK-STRIP-CTX: main:379:0
; CHECK-STRIP-CTX: 0: 0
; CHECK-STRIP-CTX: 2: 0
; CHECK-STRIP-CTX: 1: foo:379
; CHECK-STRIP-CTX: 2.1: 14
; CHECK-STRIP-CTX: 3: 15
; CHECK-STRIP-CTX: 3.2: 1
; CHECK-STRIP-CTX: 4: 0
; CHECK-STRIP-CTX: 7: 0
; CHECK-STRIP-CTX: 3.1: bar:84
; CHECK-STRIP-CTX: 1: 14
; CHECK-UNWINDER: [main:1 @ foo]
; CHECK-UNWINDER: 4
; CHECK-UNWINDER: 670-6ad:1

View File

@ -4,6 +4,8 @@
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
; RUN: FileCheck %s --input-file %t
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG-UNWINDER
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
@ -58,6 +60,19 @@
; CHECK: 4: 1
; CHECK: 5: 3
; CHECK-STRIP-CTX: foo:57:0
; CHECK-STRIP-CTX: 0: 0
; CHECK-STRIP-CTX: 1: 0
; CHECK-STRIP-CTX: 2: 3
; CHECK-STRIP-CTX: 3: 3 bar:3
; CHECK-STRIP-CTX: 4: 0
; CHECK-STRIP-CTX: 5: 0
; CHECK-STRIP-CTX: bar:50:3
; CHECK-STRIP-CTX: 0: 3
; CHECK-STRIP-CTX: 1: 3
; CHECK-STRIP-CTX: 2: 2
; CHECK-STRIP-CTX: 4: 1
; CHECK-STRIP-CTX: 5: 3
; CHECK-UNWINDER: [main:1 @ foo]
; CHECK-UNWINDER-NEXT: 3

View File

@ -24,6 +24,11 @@ cl::opt<bool> UseOffset("use-offset", cl::ReallyHidden, cl::init(true),
cl::ZeroOrMore,
cl::desc("Work with `--skip-symbolization` to dump the "
"offset instead of virtual address."));
cl::opt<bool>
IgnoreStackSamples("ignore-stack-samples", cl::ReallyHidden,
cl::init(false), cl::ZeroOrMore,
cl::desc("Ignore call stack samples for hybrid samples "
"and produce context-insensitive profile."));
extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;
@ -365,9 +370,6 @@ void HybridPerfReader::unwindSamples() {
WithColor::warning() << "Profile context truncated due to missing probe "
<< "for call instruction at "
<< format("%" PRIx64, Address) << "\n";
if (SkipSymbolization)
writeRawProfile(OutputFilename);
}
bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,
@ -675,14 +677,18 @@ void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
}
void LBRPerfReader::generateRawProfile() {
assert(SampleCounters.size() == 1 && "Must have one entry of sample counter");
// There is no context for LBR only sample, so initialize one entry with
// fake "empty" context key.
assert(SampleCounters.empty() &&
"Sample counter map should be empty before raw profile generation");
std::shared_ptr<StringBasedCtxKey> Key =
std::make_shared<StringBasedCtxKey>();
Key->genHashCode();
SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
for (const auto &Item : AggregatedSamples) {
const PerfSample *Sample = Item.first.getPtr();
computeCounterFromLBR(Sample, Item.second);
}
if (SkipSymbolization)
PerfReaderBase::writeRawProfile(OutputFilename);
}
uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {
@ -774,7 +780,13 @@ PerfReaderBase::extractPerfType(cl::list<std::string> &PerfTraceFilenames) {
return PerfType;
}
void HybridPerfReader::generateRawProfile() { unwindSamples(); }
void HybridPerfReader::generateRawProfile() {
ProfileIsCS = !IgnoreStackSamples;
if (ProfileIsCS)
unwindSamples();
else
LBRPerfReader::generateRawProfile();
}
void PerfReaderBase::warnTruncatedStack() {
for (auto Address : InvalidReturnAddresses) {
@ -793,6 +805,9 @@ void PerfReaderBase::parsePerfTraces(
warnTruncatedStack();
generateRawProfile();
if (SkipSymbolization)
writeRawProfile(OutputFilename);
}
} // end namespace sampleprof

View File

@ -574,12 +574,12 @@ public:
};
void updateBinaryAddress(const MMapEvent &Event);
PerfScriptType getPerfScriptType() const { return PerfType; }
// Entry of the reader to parse multiple perf traces
void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
const ContextSampleCounterMap &getSampleCounters() const {
return SampleCounters;
}
bool profileIsCS() { return ProfileIsCS; }
protected:
static PerfScriptType
@ -623,29 +623,8 @@ protected:
PerfScriptType PerfType = PERF_UNKNOWN;
// Keep track of all invalid return addresses
std::set<uint64_t> InvalidReturnAddresses;
};
/*
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
which is used to generate CS profile. An example of hybrid sample:
4005dc # call stack leaf
400634
400684 # call stack root
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
*/
class HybridPerfReader : public PerfReaderBase {
public:
HybridPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) {
PerfType = PERF_LBR_STACK;
};
// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateRawProfile() override;
private:
// Unwind the hybrid samples after aggregration
void unwindSamples();
bool ProfileIsCS = false;
};
/*
@ -657,21 +636,37 @@ private:
class LBRPerfReader : public PerfReaderBase {
public:
LBRPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) {
// There is no context for LBR only sample, so initialize one entry with
// fake "empty" context key.
std::shared_ptr<StringBasedCtxKey> Key =
std::make_shared<StringBasedCtxKey>();
Key->genHashCode();
SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
PerfType = PERF_LBR;
};
// Parse the LBR only sample.
virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
virtual void generateRawProfile() override;
private:
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
};
/*
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
which is used to generate CS profile. An example of hybrid sample:
4005dc # call stack leaf
400634
400684 # call stack root
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
*/
class HybridPerfReader : public LBRPerfReader {
public:
HybridPerfReader(ProfiledBinary *Binary) : LBRPerfReader(Binary) {
PerfType = PERF_LBR_STACK;
};
// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateRawProfile() override;
private:
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
// Unwind the hybrid samples after aggregration
void unwindSamples();
};
} // end namespace sampleprof

View File

@ -77,15 +77,12 @@ int CSProfileGenerator::MaxContextDepth = -1;
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary,
const ContextSampleCounterMap &SampleCounters,
enum PerfScriptType SampleType) {
bool ProfileIsCS) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (SampleType == PERF_LBR) {
// TODO: Support probe based profile generation
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
} else if (SampleType == PERF_LBR_STACK) {
if (ProfileIsCS) {
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
} else {
llvm_unreachable("Unsupported perfscript!");
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
}
return Generator;

View File

@ -34,7 +34,7 @@ public:
virtual ~ProfileGeneratorBase() = default;
static std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
enum PerfScriptType SampleType);
bool ProfileIsCS);
virtual void generateProfile() = 0;
void write();

View File

@ -95,7 +95,7 @@ int main(int argc, const char *argv[]) {
std::unique_ptr<ProfileGeneratorBase> Generator =
ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
Reader->getPerfScriptType());
Reader->profileIsCS());
Generator->generateProfile();
Generator->write();