[lld-macho] Use fewer indirections in UnwindInfo implementation

The previous implementation of UnwindInfoSection materialized
all the compact unwind entries & applied their relocations, then parsed
the resulting data to generate the final unwind info. This design had
some unfortunate conseqeuences: since relocations can only be applied
after their referents have had addresses assigned, operations that need
to happen before address assignment must contort themselves. (See
{D113582} and observe how this diff greatly simplifies it.)

Moreover, it made synthesizing new compact unwind entries awkward.
Handling PR50956 will require us to do this synthesis, and is the main
motivation behind this diff.

Previously, instead of generating a new CompactUnwindEntry directly, we
would have had to generate a ConcatInputSection with a number of
`Reloc`s that would then get "flattened" into a CompactUnwindEntry.

This diff introduces an internal representation of `CompactUnwindEntry`
(the former `CompactUnwindEntry` has been renamed to
`CompactUnwindLayout`). The new CompactUnwindEntry stores references to
its personality symbol and LSDA section directly, without the use of
`Reloc` structs.

In addition to being easier to work with, this diff also allows us to
handle unwind info whose personality symbols are located in sections
placed after the `__unwind_info`.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D123276
This commit is contained in:
Jez Ng 2022-04-08 22:33:00 -04:00
parent a58d0af058
commit 82dcf30636
4 changed files with 63 additions and 113 deletions

View File

@ -1013,6 +1013,11 @@ void ObjFile::registerCompactUnwind() {
referentIsec = referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>()); cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
} }
// Unwind info lives in __DATA, and finalization of __TEXT will occur
// before finalization of __DATA. Moreover, the finalization of unwind
// info depends on the exact addresses that it references. So it is safe
// for compact unwind to reference addresses in __TEXT, but not addresses
// in any other segment.
if (referentIsec->getSegName() != segment_names::text) if (referentIsec->getSegName() != segment_names::text)
error(isec->getLocation(r.offset) + " references section " + error(isec->getLocation(r.offset) + " references section " +
referentIsec->getName() + " which is not in segment __TEXT"); referentIsec->getName() + " which is not in segment __TEXT");

View File

@ -95,7 +95,7 @@ using namespace lld::macho;
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages? // TODO(gkm): how do we align the 2nd-level pages?
template <class Ptr> struct CompactUnwindEntry { template <class Ptr> struct CompactUnwindLayout {
Ptr functionAddress; Ptr functionAddress;
uint32_t functionLength; uint32_t functionLength;
compact_unwind_encoding_t encoding; compact_unwind_encoding_t encoding;
@ -103,6 +103,14 @@ template <class Ptr> struct CompactUnwindEntry {
Ptr lsda; Ptr lsda;
}; };
struct CompactUnwindEntry {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Symbol *personality;
InputSection *lsda;
};
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>; using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
struct SecondLevelPage { struct SecondLevelPage {
@ -118,8 +126,7 @@ template <class Ptr>
class UnwindInfoSectionImpl final : public UnwindInfoSection { class UnwindInfoSectionImpl final : public UnwindInfoSection {
public: public:
void prepareRelocations(ConcatInputSection *) override; void prepareRelocations(ConcatInputSection *) override;
void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &); void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
Reloc *findLsdaReloc(ConcatInputSection *) const;
void encodePersonalities(); void encodePersonalities();
void finalize() override; void finalize() override;
void writeTo(uint8_t *buf) const override; void writeTo(uint8_t *buf) const override;
@ -129,11 +136,10 @@ private:
EncodingMap commonEncodingIndexes; EncodingMap commonEncodingIndexes;
// The entries here will be in the same order as their originating symbols // The entries here will be in the same order as their originating symbols
// in symbolsVec. // in symbolsVec.
std::vector<CompactUnwindEntry<Ptr>> cuEntries; std::vector<CompactUnwindEntry> cuEntries;
// Indices into the cuEntries vector. // Indices into the cuEntries vector.
std::vector<size_t> cuIndices; std::vector<size_t> cuIndices;
// Indices of personality functions within the GOT. std::vector<Symbol *> personalities;
std::vector<Ptr> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable; personalityTable;
// Indices into cuEntries for CUEs with a non-null LSDA. // Indices into cuEntries for CUEs with a non-null LSDA.
@ -206,8 +212,7 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
// compact unwind entries that references them, and thus appear as section // compact unwind entries that references them, and thus appear as section
// relocs. There is no need to prepare them. We only prepare relocs for // relocs. There is no need to prepare them. We only prepare relocs for
// personality functions. // personality functions.
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) != if (r.offset != offsetof(CompactUnwindLayout<Ptr>, personality))
offsetof(CompactUnwindEntry<Ptr>, personality))
continue; continue;
if (auto *s = r.referent.dyn_cast<Symbol *>()) { if (auto *s = r.referent.dyn_cast<Symbol *>()) {
@ -275,55 +280,35 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
} }
} }
// Unwind info lives in __DATA, and finalization of __TEXT will occur before
// finalization of __DATA. Moreover, the finalization of unwind info depends on
// the exact addresses that it references. So it is safe for compact unwind to
// reference addresses in __TEXT, but not addresses in any other segment.
static ConcatInputSection *
checkTextSegment(InputSection *isec, InputSection *cuIsec, uint64_t off) {
if (isec->getSegName() != segment_names::text)
error(cuIsec->getLocation(off) + " references section " + isec->getName() +
" which is not in segment __TEXT");
// __TEXT should always contain ConcatInputSections.
return cast<ConcatInputSection>(isec);
}
// We need to apply the relocations to the pre-link compact unwind section // We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute // before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there // relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful. // is no source address to make a relative location meaningful.
template <class Ptr> template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind( void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
std::vector<CompactUnwindEntry<Ptr>> &cuEntries) { std::vector<CompactUnwindEntry> &cuEntries) {
parallelForEachN(0, symbolsVec.size(), [&](size_t i) { parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
uint8_t *buf = reinterpret_cast<uint8_t *>(cuEntries.data()) + CompactUnwindEntry &cu = cuEntries[i];
i * sizeof(CompactUnwindEntry<Ptr>);
const Defined *d = symbolsVec[i].second; const Defined *d = symbolsVec[i].second;
// Write the functionAddress. cu.functionAddress = d->getVA();
writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
if (!d->unwindEntry) if (!d->unwindEntry)
return; return;
// Write the rest of the CUE. auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(), sizeof(Ptr);
d->unwindEntry->data.size()); cu.functionLength = support::endian::read32le(
buf + offsetof(CompactUnwindLayout<Ptr>, functionLength));
cu.encoding = support::endian::read32le(
buf + offsetof(CompactUnwindLayout<Ptr>, encoding));
for (const Reloc &r : d->unwindEntry->relocs) { for (const Reloc &r : d->unwindEntry->relocs) {
uint64_t referentVA = 0; if (r.offset == offsetof(CompactUnwindLayout<Ptr>, personality)) {
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { cu.personality = r.referent.get<Symbol *>();
if (!isa<Undefined>(referentSym)) { } else if (r.offset == offsetof(CompactUnwindLayout<Ptr>, lsda)) {
if (auto *defined = dyn_cast<Defined>(referentSym)) if (auto *referentSym = r.referent.dyn_cast<Symbol *>())
checkTextSegment(defined->isec, d->unwindEntry, r.offset); cu.lsda = cast<Defined>(referentSym)->isec;
// At this point in the link, we may not yet know the final address of else
// the GOT, so we just encode the index. We make it a 1-based index so cu.lsda = r.referent.get<InputSection *>();
// that we can distinguish the null pointer case.
referentVA = referentSym->gotIndex + 1;
}
} else {
auto *referentIsec = r.referent.get<InputSection *>();
checkTextSegment(referentIsec, d->unwindEntry, r.offset);
referentVA = referentIsec->getVA(r.addend);
} }
writeAddress(buf + r.offset, referentVA, r.length);
} }
}); });
} }
@ -332,8 +317,8 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
// encode them as 2-bit indices into a small array. // encode them as 2-bit indices into a small array.
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() { template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
for (size_t idx : cuIndices) { for (size_t idx : cuIndices) {
CompactUnwindEntry<Ptr> &cu = cuEntries[idx]; CompactUnwindEntry &cu = cuEntries[idx];
if (cu.personality == 0) if (cu.personality == nullptr)
continue; continue;
// Linear search is fast enough for a small array. // Linear search is fast enough for a small array.
auto it = find(personalities, cu.personality); auto it = find(personalities, cu.personality);
@ -349,7 +334,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
} }
if (personalities.size() > 3) if (personalities.size() > 3)
error("too many personalities (" + std::to_string(personalities.size()) + error("too many personalities (" + Twine(personalities.size()) +
") for compact unwind to encode"); ") for compact unwind to encode");
} }
@ -378,20 +363,6 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
return true; return true;
} }
template <class Ptr>
Reloc *
UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const {
if (isec == nullptr)
return nullptr;
auto it = llvm::find_if(isec->relocs, [](const Reloc &r) {
return r.offset % sizeof(CompactUnwindEntry<Ptr>) ==
offsetof(CompactUnwindEntry<Ptr>, lsda);
});
if (it == isec->relocs.end())
return nullptr;
return &*it;
}
// Scan the __LD,__compact_unwind entries and compute the space needs of // Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame. // __TEXT,__unwind_info and __TEXT,__eh_frame.
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() { template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
@ -434,25 +405,9 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
cuEntries[*foldBegin].personality == cuEntries[*foldBegin].personality ==
cuEntries[*foldEnd].personality && cuEntries[*foldEnd].personality &&
canFoldEncoding(cuEntries[*foldEnd].encoding)) { cuEntries[*foldBegin].lsda == cuEntries[*foldEnd].lsda &&
// In most cases, we can just compare the values of cuEntries[*].lsda. canFoldEncoding(cuEntries[*foldEnd].encoding))
// However, it is possible for -rename_section to cause the LSDA section ;
// (__gcc_except_tab) to be finalized after the unwind info section. In
// that case, we don't yet have unique addresses for the LSDA entries.
// So we check their relocations instead.
// FIXME: should we account for an LSDA at an absolute address? ld64 seems
// to support it, but it seems unlikely to be used in practice.
Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
if (lsda1 == nullptr && lsda2 == nullptr)
continue;
if (lsda1 == nullptr || lsda2 == nullptr)
break;
if (lsda1->referent != lsda2->referent)
break;
if (lsda1->addend != lsda2->addend)
break;
}
*foldWrite++ = *foldBegin; *foldWrite++ = *foldBegin;
foldBegin = foldEnd; foldBegin = foldEnd;
} }
@ -510,7 +465,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
sizeof(uint32_t); sizeof(uint32_t);
while (wordsRemaining >= 1 && i < cuIndices.size()) { while (wordsRemaining >= 1 && i < cuIndices.size()) {
idx = cuIndices[i]; idx = cuIndices[i];
const CompactUnwindEntry<Ptr> *cuPtr = &cuEntries[idx]; const CompactUnwindEntry *cuPtr = &cuEntries[idx];
if (cuPtr->functionAddress >= functionAddressMax) { if (cuPtr->functionAddress >= functionAddressMax) {
break; break;
} else if (commonEncodingIndexes.count(cuPtr->encoding) || } else if (commonEncodingIndexes.count(cuPtr->encoding) ||
@ -545,8 +500,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
for (size_t idx : cuIndices) { for (size_t idx : cuIndices) {
lsdaIndex[idx] = entriesWithLsda.size(); lsdaIndex[idx] = entriesWithLsda.size();
const Defined *d = symbolsVec[idx].second; if (cuEntries[idx].lsda)
if (findLsdaReloc(d->unwindEntry))
entriesWithLsda.push_back(idx); entriesWithLsda.push_back(idx);
} }
@ -588,9 +542,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
*i32p++ = encoding.first; *i32p++ = encoding.first;
// Personalities // Personalities
for (Ptr personality : personalities) for (const Symbol *personality : personalities)
*i32p++ = *i32p++ = personality->getGotVA() - in.header->addr;
in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
// Level-1 index // Level-1 index
uint32_t lsdaOffset = uint32_t lsdaOffset =
@ -609,7 +562,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
} }
// Level-1 sentinel // Level-1 sentinel
const CompactUnwindEntry<Ptr> &cuEnd = cuEntries[cuIndices.back()]; const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()];
iep->functionOffset = iep->functionOffset =
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength; cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0; iep->secondLevelPagesSectionOffset = 0;
@ -622,18 +575,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
auto *lep = auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
for (size_t idx : entriesWithLsda) { for (size_t idx : entriesWithLsda) {
const CompactUnwindEntry<Ptr> &cu = cuEntries[idx]; const CompactUnwindEntry &cu = cuEntries[idx];
const Defined *d = symbolsVec[idx].second; lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr;
if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
uint64_t va;
if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
va = isec->getVA(r->addend);
} else {
auto *sym = r->referent.get<Symbol *>();
va = sym->getVA() + r->addend;
}
lep->lsdaOffset = va - in.header->addr;
}
lep->functionOffset = cu.functionAddress - in.header->addr; lep->functionOffset = cu.functionAddress - in.header->addr;
lep++; lep++;
} }
@ -656,7 +599,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->encodingsCount = page.localEncodings.size(); p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) { for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry<Ptr> &cue = const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]]; cuEntries[cuIndices[page.entryIndex + i]];
auto it = commonEncodingIndexes.find(cue.encoding); auto it = commonEncodingIndexes.find(cue.encoding);
if (it == commonEncodingIndexes.end()) if (it == commonEncodingIndexes.end())
@ -676,7 +619,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->entryCount = page.entryCount; p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) { for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry<Ptr> &cue = const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]]; cuEntries[cuIndices[page.entryIndex + i]];
*ep++ = cue.functionAddress; *ep++ = cue.functionAddress;
*ep++ = cue.encoding; *ep++ = cue.encoding;

View File

@ -81,6 +81,7 @@
.globl _my_personality, _exception0 .globl _my_personality, _exception0
.text .text
.p2align 2 .p2align 2
.no_dead_strip _foo
_foo: _foo:
.cfi_startproc .cfi_startproc
## This will generate a section relocation. ## This will generate a section relocation.
@ -91,6 +92,7 @@ _foo:
.cfi_endproc .cfi_endproc
.p2align 2 .p2align 2
.no_dead_strip _bar
_bar: _bar:
.cfi_startproc .cfi_startproc
## Check that we dedup references to the same statically-linked personality. ## Check that we dedup references to the same statically-linked personality.
@ -100,7 +102,11 @@ _bar:
ret ret
.cfi_endproc .cfi_endproc
.data
.p2align 2 .p2align 2
## We put this personality in `__data` to test if we correctly handle
## personality symbols whose output addresses occur after that of the
## `__unwind_info` section.
_my_personality: _my_personality:
ret ret
@ -108,6 +114,8 @@ _my_personality:
_exception0: _exception0:
.space 1 .space 1
.subsections_via_symbols
#--- main.s #--- main.s
.globl _main, _quux, _my_personality, _exception1 .globl _main, _quux, _my_personality, _exception1

View File

@ -1,10 +1,9 @@
# REQUIRES: x86 # REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t # RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-function.s -o %t/bad-function.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-function.s -o %t/bad-function.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-personality.s -o %t/bad-personality.o # RUN: not %lld -lSystem -dylib -lc++ %t/bad-function.o -o /dev/null 2>&1 | FileCheck %s
# RUN: not %lld -lSystem -lc++ %t/bad-function.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-function.o -D#OFF=0x0 # CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x0) references section __data which is not in segment __TEXT
# RUN: not %lld -lSystem -lc++ %t/bad-personality.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-personality.o -D#OFF=0x10 # CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x20) references section __data which is not in segment __TEXT
# CHECK: error: [[FILE]]:(__compact_unwind+0x[[#%x,OFF]]) references section __data which is not in segment __TEXT
#--- bad-function.s #--- bad-function.s
.data .data
@ -15,16 +14,11 @@ _not_a_function:
retq retq
.cfi_endproc .cfi_endproc
#--- bad-personality.s _not_a_function_2:
.globl _main, _not_a_function
.text
_main:
.cfi_startproc .cfi_startproc
.cfi_personality 155, _my_personality .cfi_personality 155, ___gxx_personality_v0
.cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16
retq retq
.cfi_endproc .cfi_endproc
.data .subsections_via_symbols
.globl _my_personality
_my_personality: