[lld/mac] Tweak estimateStubsInRangeVA a bit
- Move a few variables closer to their uses, remove some completely (no behavior change) - Add some comments - Make maxPotentialThunks include calls to stubs. It's possible that an earlier call to a stub late in the stub table will need a thunk, and that inserted thunk could push a stub earlier in the stub table out of range. This is unlikely to happen, but usually there are way fewer stub calls than non-stub calls, so if we're doing a conservative approximation here we might as well do it correctly. (For chromium's unit_tests target, 134421/242639 stub calls are direct calls without this change, compared to 134408/242639 with this change) No real, meaningful behavior difference. Differential Revision: https://reviews.llvm.org/D108924
This commit is contained in:
parent
0529e2e018
commit
83df94067d
|
@ -53,7 +53,7 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
|
|||
// multiple thunks to the same destination distributed throughout a large
|
||||
// program so that all call sites can have one within range.
|
||||
//
|
||||
// The optimal approach is to mix islands for distinations within two hops,
|
||||
// The optimal approach is to mix islands for destinations within two hops,
|
||||
// and use thunks for destinations at greater distance. For now, we only
|
||||
// implement thunks. TODO: Adding support for branch islands!
|
||||
//
|
||||
|
@ -141,7 +141,7 @@ bool ConcatOutputSection::needsThunks() const {
|
|||
ThunkInfo &thunkInfo = thunkMap[sym];
|
||||
// Knowing ThunkInfo call site count will help us know whether or not we
|
||||
// might need to create more for this referent at the time we are
|
||||
// estimating distance to __stubs in .
|
||||
// estimating distance to __stubs in estimateStubsInRangeVA().
|
||||
++thunkInfo.callSiteCount;
|
||||
// Knowing InputSection call site count will help us avoid work on those
|
||||
// that have no BRANCH relocs.
|
||||
|
@ -153,28 +153,34 @@ bool ConcatOutputSection::needsThunks() const {
|
|||
|
||||
// Since __stubs is placed after __text, we must estimate the address
|
||||
// beyond which stubs are within range of a simple forward branch.
|
||||
// This is called exactly once, when the last input section has been finalized.
|
||||
uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
|
||||
size_t endIdx = inputs.size();
|
||||
ConcatInputSection *isec = inputs[callIdx];
|
||||
uint64_t isecVA = isec->getVA();
|
||||
// Tally the non-stub functions which still have call sites
|
||||
// remaining to process, which yields the maximum number
|
||||
// of thunks we might yet place.
|
||||
// Tally the functions which still have call sites remaining to process,
|
||||
// which yields the maximum number of thunks we might yet place.
|
||||
size_t maxPotentialThunks = 0;
|
||||
for (auto &tp : thunkMap) {
|
||||
ThunkInfo &ti = tp.second;
|
||||
maxPotentialThunks +=
|
||||
!tp.first->isInStubs() && ti.callSitesUsed < ti.callSiteCount;
|
||||
// This overcounts: Only sections that are in forward jump range from the
|
||||
// currently-active section get finalized, and all input sections are
|
||||
// finalized when estimateStubsInRangeVA() is called. So only backward
|
||||
// jumps will need thunks, but we count all jumps.
|
||||
if (ti.callSitesUsed < ti.callSiteCount)
|
||||
maxPotentialThunks += 1;
|
||||
}
|
||||
// Tally the total size of input sections remaining to process.
|
||||
uint64_t isecEnd = isec->getVA();
|
||||
for (size_t i = callIdx; i < endIdx; i++) {
|
||||
uint64_t isecVA = inputs[callIdx]->getVA();
|
||||
uint64_t isecEnd = isecVA;
|
||||
for (size_t i = callIdx; i < inputs.size(); i++) {
|
||||
InputSection *isec = inputs[i];
|
||||
isecEnd = alignTo(isecEnd, isec->align) + isec->getSize();
|
||||
}
|
||||
// Estimate the address after which call sites can safely call stubs
|
||||
// directly rather than through intermediary thunks.
|
||||
uint64_t forwardBranchRange = target->forwardBranchRange;
|
||||
assert(isecEnd > forwardBranchRange &&
|
||||
"should not run thunk insertion if all code fits in jump range");
|
||||
assert(isecEnd - isecVA <= forwardBranchRange &&
|
||||
"should only finalize sections in jump range");
|
||||
uint64_t stubsInRangeVA = isecEnd + maxPotentialThunks * target->thunkSize +
|
||||
in.stubs->getSize() - forwardBranchRange;
|
||||
log("thunks = " + std::to_string(thunkMap.size()) +
|
||||
|
@ -264,6 +270,7 @@ void ConcatOutputSection::finalize() {
|
|||
ThunkInfo &thunkInfo = thunkMap[funcSym];
|
||||
// The referent is not reachable, so we need to use a thunk ...
|
||||
if (funcSym->isInStubs() && callVA >= stubsInRangeVA) {
|
||||
assert(callVA != TargetInfo::outOfRangeVA);
|
||||
// ... Oh, wait! We are close enough to the end that __stubs
|
||||
// are now within range of a simple forward branch.
|
||||
continue;
|
||||
|
@ -284,7 +291,7 @@ void ConcatOutputSection::finalize() {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
// ... otherwise, create a new thunk
|
||||
// ... otherwise, create a new thunk.
|
||||
if (isecAddr > highVA) {
|
||||
// When there is small-to-no margin between highVA and
|
||||
// isecAddr and the distance between subsequent call sites is
|
||||
|
|
Loading…
Reference in New Issue