[llvm-profgen] Strip context to support non-CS profile generation for hybrid sample
Differential Revision: https://reviews.llvm.org/D109769
This commit is contained in:
parent
c3717b6858
commit
a03cf331e1
|
@ -2,6 +2,8 @@
|
|||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
|
||||
; RUN: FileCheck %s --input-file %t
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
|
||||
|
||||
; CHECK:[main:1 @ foo]:309:0
|
||||
; CHECK: 2.1: 14
|
||||
|
@ -11,6 +13,18 @@
|
|||
; CHECK:[main:1 @ foo:3.1 @ bar]:84:0
|
||||
; CHECK: 1: 14
|
||||
|
||||
; CHECK-STRIP-CTX: main:379:0
|
||||
; CHECK-STRIP-CTX: 0: 0
|
||||
; CHECK-STRIP-CTX: 2: 0
|
||||
; CHECK-STRIP-CTX: 1: foo:379
|
||||
; CHECK-STRIP-CTX: 2.1: 14
|
||||
; CHECK-STRIP-CTX: 3: 15
|
||||
; CHECK-STRIP-CTX: 3.2: 1
|
||||
; CHECK-STRIP-CTX: 4: 0
|
||||
; CHECK-STRIP-CTX: 7: 0
|
||||
; CHECK-STRIP-CTX: 3.1: bar:84
|
||||
; CHECK-STRIP-CTX: 1: 14
|
||||
|
||||
; CHECK-UNWINDER: [main:1 @ foo]
|
||||
; CHECK-UNWINDER: 4
|
||||
; CHECK-UNWINDER: 670-6ad:1
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
|
||||
; RUN: FileCheck %s --input-file %t
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0 --ignore-stack-samples
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0
|
||||
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG-UNWINDER
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
|
||||
|
@ -58,6 +60,19 @@
|
|||
; CHECK: 4: 1
|
||||
; CHECK: 5: 3
|
||||
|
||||
; CHECK-STRIP-CTX: foo:57:0
|
||||
; CHECK-STRIP-CTX: 0: 0
|
||||
; CHECK-STRIP-CTX: 1: 0
|
||||
; CHECK-STRIP-CTX: 2: 3
|
||||
; CHECK-STRIP-CTX: 3: 3 bar:3
|
||||
; CHECK-STRIP-CTX: 4: 0
|
||||
; CHECK-STRIP-CTX: 5: 0
|
||||
; CHECK-STRIP-CTX: bar:50:3
|
||||
; CHECK-STRIP-CTX: 0: 3
|
||||
; CHECK-STRIP-CTX: 1: 3
|
||||
; CHECK-STRIP-CTX: 2: 2
|
||||
; CHECK-STRIP-CTX: 4: 1
|
||||
; CHECK-STRIP-CTX: 5: 3
|
||||
|
||||
; CHECK-UNWINDER: [main:1 @ foo]
|
||||
; CHECK-UNWINDER-NEXT: 3
|
||||
|
|
|
@ -24,6 +24,11 @@ cl::opt<bool> UseOffset("use-offset", cl::ReallyHidden, cl::init(true),
|
|||
cl::ZeroOrMore,
|
||||
cl::desc("Work with `--skip-symbolization` to dump the "
|
||||
"offset instead of virtual address."));
|
||||
cl::opt<bool>
|
||||
IgnoreStackSamples("ignore-stack-samples", cl::ReallyHidden,
|
||||
cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Ignore call stack samples for hybrid samples "
|
||||
"and produce context-insensitive profile."));
|
||||
|
||||
extern cl::opt<bool> ShowDisassemblyOnly;
|
||||
extern cl::opt<bool> ShowSourceLocations;
|
||||
|
@ -365,9 +370,6 @@ void HybridPerfReader::unwindSamples() {
|
|||
WithColor::warning() << "Profile context truncated due to missing probe "
|
||||
<< "for call instruction at "
|
||||
<< format("%" PRIx64, Address) << "\n";
|
||||
|
||||
if (SkipSymbolization)
|
||||
writeRawProfile(OutputFilename);
|
||||
}
|
||||
|
||||
bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt,
|
||||
|
@ -675,14 +677,18 @@ void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
|
|||
}
|
||||
|
||||
void LBRPerfReader::generateRawProfile() {
|
||||
assert(SampleCounters.size() == 1 && "Must have one entry of sample counter");
|
||||
// There is no context for LBR only sample, so initialize one entry with
|
||||
// fake "empty" context key.
|
||||
assert(SampleCounters.empty() &&
|
||||
"Sample counter map should be empty before raw profile generation");
|
||||
std::shared_ptr<StringBasedCtxKey> Key =
|
||||
std::make_shared<StringBasedCtxKey>();
|
||||
Key->genHashCode();
|
||||
SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
|
||||
for (const auto &Item : AggregatedSamples) {
|
||||
const PerfSample *Sample = Item.first.getPtr();
|
||||
computeCounterFromLBR(Sample, Item.second);
|
||||
}
|
||||
|
||||
if (SkipSymbolization)
|
||||
PerfReaderBase::writeRawProfile(OutputFilename);
|
||||
}
|
||||
|
||||
uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) {
|
||||
|
@ -774,7 +780,13 @@ PerfReaderBase::extractPerfType(cl::list<std::string> &PerfTraceFilenames) {
|
|||
return PerfType;
|
||||
}
|
||||
|
||||
void HybridPerfReader::generateRawProfile() { unwindSamples(); }
|
||||
void HybridPerfReader::generateRawProfile() {
|
||||
ProfileIsCS = !IgnoreStackSamples;
|
||||
if (ProfileIsCS)
|
||||
unwindSamples();
|
||||
else
|
||||
LBRPerfReader::generateRawProfile();
|
||||
}
|
||||
|
||||
void PerfReaderBase::warnTruncatedStack() {
|
||||
for (auto Address : InvalidReturnAddresses) {
|
||||
|
@ -793,6 +805,9 @@ void PerfReaderBase::parsePerfTraces(
|
|||
|
||||
warnTruncatedStack();
|
||||
generateRawProfile();
|
||||
|
||||
if (SkipSymbolization)
|
||||
writeRawProfile(OutputFilename);
|
||||
}
|
||||
|
||||
} // end namespace sampleprof
|
||||
|
|
|
@ -574,12 +574,12 @@ public:
|
|||
};
|
||||
|
||||
void updateBinaryAddress(const MMapEvent &Event);
|
||||
PerfScriptType getPerfScriptType() const { return PerfType; }
|
||||
// Entry of the reader to parse multiple perf traces
|
||||
void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
|
||||
const ContextSampleCounterMap &getSampleCounters() const {
|
||||
return SampleCounters;
|
||||
}
|
||||
bool profileIsCS() { return ProfileIsCS; }
|
||||
|
||||
protected:
|
||||
static PerfScriptType
|
||||
|
@ -623,29 +623,8 @@ protected:
|
|||
PerfScriptType PerfType = PERF_UNKNOWN;
|
||||
// Keep track of all invalid return addresses
|
||||
std::set<uint64_t> InvalidReturnAddresses;
|
||||
};
|
||||
|
||||
/*
|
||||
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
|
||||
which is used to generate CS profile. An example of hybrid sample:
|
||||
4005dc # call stack leaf
|
||||
400634
|
||||
400684 # call stack root
|
||||
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
|
||||
*/
|
||||
class HybridPerfReader : public PerfReaderBase {
|
||||
public:
|
||||
HybridPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) {
|
||||
PerfType = PERF_LBR_STACK;
|
||||
};
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
|
||||
void generateRawProfile() override;
|
||||
|
||||
private:
|
||||
// Unwind the hybrid samples after aggregration
|
||||
void unwindSamples();
|
||||
bool ProfileIsCS = false;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -657,21 +636,37 @@ private:
|
|||
class LBRPerfReader : public PerfReaderBase {
|
||||
public:
|
||||
LBRPerfReader(ProfiledBinary *Binary) : PerfReaderBase(Binary) {
|
||||
// There is no context for LBR only sample, so initialize one entry with
|
||||
// fake "empty" context key.
|
||||
std::shared_ptr<StringBasedCtxKey> Key =
|
||||
std::make_shared<StringBasedCtxKey>();
|
||||
Key->genHashCode();
|
||||
SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
|
||||
PerfType = PERF_LBR;
|
||||
};
|
||||
|
||||
// Parse the LBR only sample.
|
||||
virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
|
||||
virtual void generateRawProfile() override;
|
||||
|
||||
private:
|
||||
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
|
||||
};
|
||||
|
||||
/*
|
||||
Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
|
||||
which is used to generate CS profile. An example of hybrid sample:
|
||||
4005dc # call stack leaf
|
||||
400634
|
||||
400684 # call stack root
|
||||
0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
|
||||
... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
|
||||
*/
|
||||
class HybridPerfReader : public LBRPerfReader {
|
||||
public:
|
||||
HybridPerfReader(ProfiledBinary *Binary) : LBRPerfReader(Binary) {
|
||||
PerfType = PERF_LBR_STACK;
|
||||
};
|
||||
// Parse the hybrid sample including the call and LBR line
|
||||
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
|
||||
void generateRawProfile() override;
|
||||
|
||||
private:
|
||||
void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
|
||||
// Unwind the hybrid samples after aggregration
|
||||
void unwindSamples();
|
||||
};
|
||||
|
||||
} // end namespace sampleprof
|
||||
|
|
|
@ -77,15 +77,12 @@ int CSProfileGenerator::MaxContextDepth = -1;
|
|||
std::unique_ptr<ProfileGeneratorBase>
|
||||
ProfileGeneratorBase::create(ProfiledBinary *Binary,
|
||||
const ContextSampleCounterMap &SampleCounters,
|
||||
enum PerfScriptType SampleType) {
|
||||
bool ProfileIsCS) {
|
||||
std::unique_ptr<ProfileGeneratorBase> Generator;
|
||||
if (SampleType == PERF_LBR) {
|
||||
// TODO: Support probe based profile generation
|
||||
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
|
||||
} else if (SampleType == PERF_LBR_STACK) {
|
||||
if (ProfileIsCS) {
|
||||
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
|
||||
} else {
|
||||
llvm_unreachable("Unsupported perfscript!");
|
||||
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
|
||||
}
|
||||
|
||||
return Generator;
|
||||
|
|
|
@ -34,7 +34,7 @@ public:
|
|||
virtual ~ProfileGeneratorBase() = default;
|
||||
static std::unique_ptr<ProfileGeneratorBase>
|
||||
create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
|
||||
enum PerfScriptType SampleType);
|
||||
bool ProfileIsCS);
|
||||
virtual void generateProfile() = 0;
|
||||
void write();
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ int main(int argc, const char *argv[]) {
|
|||
|
||||
std::unique_ptr<ProfileGeneratorBase> Generator =
|
||||
ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(),
|
||||
Reader->getPerfScriptType());
|
||||
Reader->profileIsCS());
|
||||
Generator->generateProfile();
|
||||
Generator->write();
|
||||
|
||||
|
|
Loading…
Reference in New Issue