[lld-macho][reland] Initial support for EH Frames

This reverts commit 942f4e3a7c.

The additional change required to avoid the assertion errors seen
previously is:

  --- a/lld/MachO/ICF.cpp
  +++ b/lld/MachO/ICF.cpp
  @@ -443,7 +443,9 @@ void macho::foldIdenticalSections() {
                                 /*relocVA=*/0);
           isec->data = copy;
         }
  -    } else {
  +    } else if (!isEhFrameSection(isec)) {
  +      // EH frames are gathered as hashables from unwindEntry above; give a
  +      // unique ID to everything else.
         isec->icfEqClass[0] = ++icfUniqueID;
       }
     }

Differential Revision: https://reviews.llvm.org/D123435
This commit is contained in:
Jez Ng 2022-06-12 21:56:45 -04:00
parent e4a21e1644
commit e183bf8e15
23 changed files with 991 additions and 110 deletions

View File

@ -13,6 +13,7 @@
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
@ -141,6 +142,10 @@ ARM64::ARM64() : ARM64Common(LP64()) {
backwardBranchRange = 128 * 1024 * 1024;
forwardBranchRange = backwardBranchRange - 4;
modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
unsignedRelocType = ARM64_RELOC_UNSIGNED;
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
}

View File

@ -105,6 +105,10 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) {
cpuType = CPU_TYPE_ARM64_32;
cpuSubtype = CPU_SUBTYPE_ARM64_V8;
modeDwarfEncoding = 0x04000000; // UNWIND_ARM_MODE_DWARF
subtractorRelocType = GENERIC_RELOC_INVALID; // FIXME
unsignedRelocType = GENERIC_RELOC_INVALID; // FIXME
stubSize = sizeof(stubCode);
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);

View File

@ -12,6 +12,7 @@
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
@ -185,6 +186,10 @@ X86_64::X86_64() : TargetInfo(LP64()) {
cpuType = CPU_TYPE_X86_64;
cpuSubtype = CPU_SUBTYPE_X86_64_ALL;
modeDwarfEncoding = UNWIND_X86_MODE_DWARF;
subtractorRelocType = X86_64_RELOC_SUBTRACTOR;
unsignedRelocType = X86_64_RELOC_UNSIGNED;
stubSize = sizeof(stub);
stubHelperHeaderSize = sizeof(stubHelperHeader);
stubHelperEntrySize = sizeof(stubHelperEntry);

View File

@ -14,6 +14,7 @@ add_lld_library(lldMachO
Driver.cpp
DriverUtils.cpp
Dwarf.cpp
EhFrame.cpp
ExportTrie.cpp
ICF.cpp
InputFiles.cpp

View File

@ -130,6 +130,9 @@ struct Configuration {
bool dedupLiterals = true;
bool omitDebugInfo = false;
bool warnDylibInstallName = false;
// Temporary config flag that will be removed once we have fully implemented
// support for __eh_frame.
bool parseEhFrames = false;
uint32_t headerPad;
uint32_t dylibCompatibilityVersion = 0;
uint32_t dylibCurrentVersion = 0;

View File

@ -1039,8 +1039,9 @@ static void gatherInputSections() {
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section *section : file->sections) {
// Compact unwind entries require special handling elsewhere. (In
// contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
// Compact unwind entries require special handling elsewhere.
continue;
ConcatOutputSection *osec = nullptr;
for (const Subsection &subsection : section->subsections) {
@ -1302,6 +1303,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->callGraphProfileSort = args.hasFlag(
OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
config->parseEhFrames = static_cast<bool>(getenv("LLD_IN_TEST"));
// FIXME: Add a commandline flag for this too.
config->zeroModTime = getenv("ZERO_AR_DATE");

140
lld/MachO/EhFrame.cpp Normal file
View File

@ -0,0 +1,140 @@
//===- EhFrame.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "EhFrame.h"
#include "InputFiles.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace lld;
using namespace lld::macho;
using namespace llvm::support::endian;
uint64_t EhReader::readLength(size_t *off) const {
const size_t errOff = *off;
if (*off + 4 > data.size())
failOn(errOff, "CIE/FDE too small");
uint64_t len = read32le(data.data() + *off);
*off += 4;
if (len == dwarf::DW_LENGTH_DWARF64) {
// FIXME: test this DWARF64 code path
if (*off + 8 > data.size())
failOn(errOff, "CIE/FDE too small");
len = read64le(data.data() + *off);
*off += 8;
}
if (*off + len > data.size())
failOn(errOff, "CIE/FDE extends past the end of the section");
return len;
}
void EhReader::skipValidLength(size_t *off) const {
uint32_t len = read32le(data.data() + *off);
*off += 4;
if (len == dwarf::DW_LENGTH_DWARF64)
*off += 8;
}
// Read a byte and advance off by one byte.
uint8_t EhReader::readByte(size_t *off) const {
if (*off + 1 > data.size())
failOn(*off, "unexpected end of CIE/FDE");
return data[(*off)++];
}
uint32_t EhReader::readU32(size_t *off) const {
if (*off + 4 > data.size())
failOn(*off, "unexpected end of CIE/FDE");
uint32_t v = read32le(data.data() + *off);
*off += 4;
return v;
}
uint64_t EhReader::readPointer(size_t *off) const {
if (*off + wordSize > data.size())
failOn(*off, "unexpected end of CIE/FDE");
uint64_t v;
if (wordSize == 8)
v = read64le(data.data() + *off);
else {
assert(wordSize == 4);
v = read32le(data.data() + *off);
}
*off += wordSize;
return v;
}
// Read a null-terminated string.
StringRef EhReader::readString(size_t *off) const {
if (*off > data.size())
failOn(*off, "corrupted CIE (failed to read string)");
const size_t maxlen = data.size() - *off;
auto *c = reinterpret_cast<const char *>(data.data() + *off);
size_t len = strnlen(c, maxlen);
if (len == maxlen) // we failed to find the null terminator
failOn(*off, "corrupted CIE (failed to read string)");
*off += len + 1; // skip the null byte too
return StringRef(c, len);
}
void EhReader::skipLeb128(size_t *off) const {
const size_t errOff = *off;
while (*off < data.size()) {
uint8_t val = data[(*off)++];
if ((val & 0x80) == 0)
return;
}
failOn(errOff, "corrupted CIE (failed to read LEB128)");
}
void EhReader::failOn(size_t errOff, const Twine &msg) const {
fatal(toString(file) + ":(__eh_frame+0x" +
Twine::utohexstr(dataOff + errOff) + "): " + msg);
}
/*
* Create a pair of relocs to write the value of:
* `b - (offset + a)` if Invert == false
* `(a + offset) - b` if Invert == true
*/
template <bool Invert = false>
static void createSubtraction(PointerUnion<Symbol *, InputSection *> a,
PointerUnion<Symbol *, InputSection *> b,
uint64_t off, uint8_t length,
SmallVectorImpl<Reloc> *newRelocs) {
auto subtrahend = a;
auto minuend = b;
if (Invert)
std::swap(subtrahend, minuend);
assert(subtrahend.is<Symbol *>());
Reloc subtrahendReloc(target->subtractorRelocType, /*pcrel=*/false, length,
off, /*addend=*/0, subtrahend);
Reloc minuendReloc(target->unsignedRelocType, /*pcrel=*/false, length, off,
(Invert ? 1 : -1) * off, minuend);
newRelocs->push_back(subtrahendReloc);
newRelocs->push_back(minuendReloc);
}
void EhRelocator::makePcRel(uint64_t off,
PointerUnion<Symbol *, InputSection *> target,
uint8_t length) {
createSubtraction(isec->symbols[0], target, off, length, &newRelocs);
}
void EhRelocator::makeNegativePcRel(
uint64_t off, PointerUnion<Symbol *, InputSection *> target,
uint8_t length) {
createSubtraction</*Invert=*/true>(isec, target, off, length, &newRelocs);
}
void EhRelocator::commit() {
isec->relocs.insert(isec->relocs.end(), newRelocs.begin(), newRelocs.end());
}

120
lld/MachO/EhFrame.h Normal file
View File

@ -0,0 +1,120 @@
//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_EH_FRAME_H
#define LLD_MACHO_EH_FRAME_H
#include "InputSection.h"
#include "Relocations.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
/*
* NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
* is closely coupled with other file parsing logic; EhFrame.h just contains a
* few helpers.
*/
/*
* === The EH frame format ===
*
* EH frames can either be Common Information Entries (CIEs) or Frame
* Description Entries (FDEs). CIEs contain information that is common amongst
* several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
* entries together form a forest of two-level trees, with CIEs as the roots
* and FDEs as the leaves. Note that a CIE must precede the FDEs which point
* to it.
*
* A CIE comprises the following fields in order:
* 1. Length of the entry (4 or 12 bytes)
* 2. CIE offset (4 bytes; always 0 for CIEs)
* 3. CIE version (byte)
* 4. Null-terminated augmentation string
* 5-8. LEB128 values that we don't care about
* 9. Augmentation data, to be interpreted using the aug string
* 10. DWARF instructions (ignored by LLD)
*
* An FDE comprises of the following:
* 1. Length of the entry (4 or 12 bytes)
* 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
* 3. Function address (pointer-sized pcrel offset)
* 4. (Optional) Augmentation data length
* 5. (Optional) LSDA address (pointer-sized pcrel offset)
* 6. DWARF instructions (ignored by LLD)
*/
namespace lld {
namespace macho {
class EhReader {
public:
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
size_t wordSize)
: file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
size_t size() const { return data.size(); }
// Read and validate the length field.
uint64_t readLength(size_t *off) const;
// Skip the length field without doing validation.
void skipValidLength(size_t *off) const;
uint8_t readByte(size_t *off) const;
uint32_t readU32(size_t *off) const;
uint64_t readPointer(size_t *off) const;
StringRef readString(size_t *off) const;
void skipLeb128(size_t *off) const;
void failOn(size_t errOff, const Twine &msg) const;
private:
const ObjFile *file;
ArrayRef<uint8_t> data;
// The offset of the data array within its section. Used only for error
// reporting.
const size_t dataOff;
size_t wordSize;
};
// The EH frame format, when emitted by llvm-mc, consists of a number of
// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
// pcrel offsets in the section data. The offsets refer to the locations of
// symbols in the input object file. When we ingest these EH frames, we convert
// these implicit relocations into explicit Relocs.
//
// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
// However, we need this operation to be cross-platform, and ARM does not have a
// similar relocation that is applicable. We therefore use the more verbose (but
// more generic) subtractor relocation to encode these pcrel values. ld64
// appears to do something similar -- its `-r` output contains these explicit
// subtractor relocations.
class EhRelocator {
public:
EhRelocator(InputSection *isec) : isec(isec) {}
// For the next two methods, let `PC` denote `isec address + off`.
// Create relocs writing the value of target - PC to PC.
void makePcRel(uint64_t off,
llvm::PointerUnion<Symbol *, InputSection *> target,
uint8_t length);
// Create relocs writing the value of PC - target to PC.
void makeNegativePcRel(uint64_t off,
llvm::PointerUnion<Symbol *, InputSection *> target,
uint8_t length);
// Insert the new relocations into isec->relocs.
void commit();
private:
InputSection *isec;
// Insert new relocs here so that we don't invalidate iterators into the
// existing relocs vector.
SmallVector<Reloc, 6> newRelocs;
};
} // namespace macho
} // namespace lld
#endif

View File

@ -212,9 +212,9 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasUnwind);
auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasUnwind);
if (itA == ia->symbols.end())
return itB == ib->symbols.end();
if (itB == ib->symbols.end())
@ -443,7 +443,9 @@ void macho::foldIdenticalSections() {
/*relocVA=*/0);
isec->data = copy;
}
} else {
} else if (!isEhFrameSection(isec)) {
// EH frames are gathered as hashables from unwindEntry above; give a
// unique ID to everything else.
isec->icfEqClass[0] = ++icfUniqueID;
}
}

View File

@ -45,6 +45,7 @@
#include "Config.h"
#include "Driver.h"
#include "Dwarf.h"
#include "EhFrame.h"
#include "ExportTrie.h"
#include "InputSection.h"
#include "MachOStructs.h"
@ -323,6 +324,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
section, data.slice(off, recordSize), align);
subsections.push_back({off, isec});
}
section.doneSplitting = true;
};
if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
@ -344,6 +346,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
section.subsections.push_back({0, isec});
} else if (auto recordSize = getRecordSize(segname, name)) {
splitRecords(*recordSize);
} else if (config->parseEhFrames && name == section_names::ehFrame &&
segname == segment_names::text) {
splitEhFrames(data, *sections.back());
} else if (segname == segment_names::llvm) {
if (config->callGraphProfileSort && name == section_names::cgProfile)
checkError(parseCallGraph(data, callGraph));
@ -371,6 +376,45 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
}
}
void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
size_t off = 0;
while (off < reader.size()) {
uint64_t frameOff = off;
uint64_t length = reader.readLength(&off);
if (length == 0)
break;
uint64_t fullLength = length + (off - frameOff);
off += length;
// We hard-code an alignment of 1 here because we don't actually want our
// EH frames to be aligned to the section alignment. EH frame decoders don't
// expect this alignment. Moreover, each EH frame must start where the
// previous one ends, and where it ends is indicated by the length field.
// Unless we update the length field (troublesome), we should keep the
// alignment to 1.
// Note that we still want to preserve the alignment of the overall section,
// just not of the individual EH frames.
ehFrameSection.subsections.push_back(
{frameOff, make<ConcatInputSection>(ehFrameSection,
data.slice(frameOff, fullLength),
/*align=*/1)});
}
ehFrameSection.doneSplitting = true;
}
template <class T>
static Section *findContainingSection(const std::vector<Section *> &sections,
T *offset) {
static_assert(std::is_same<uint64_t, T>::value ||
std::is_same<uint32_t, T>::value,
"unexpected type for offset");
auto it = std::prev(llvm::upper_bound(
sections, *offset,
[](uint64_t value, const Section *sec) { return value < sec->addr; }));
*offset -= (*it)->addr;
return *it;
}
// Find the subsection corresponding to the greatest section offset that is <=
// that of the given offset.
//
@ -475,13 +519,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
relocation_info relInfo = relInfos[i];
bool isSubtrahend =
target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
// __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
// adds local "EH_Frame1" and "func.eh". Ignore them because they have
// gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
++i;
continue;
}
int64_t pairedAddend = 0;
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@ -637,7 +674,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
}
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
bool includeInSymtab = !name.startswith("l") && !name.startswith("L");
bool includeInSymtab =
!name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
return make<Defined>(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
/*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
@ -730,20 +768,14 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
Subsections &subsections = sections[i]->subsections;
if (subsections.empty())
continue;
if (sections[i]->name == section_names::ehFrame) {
// __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
// adds local "EH_Frame1" and "func.eh". Ignore them because they have
// gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
continue;
}
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
// Record-based sections have already been split into subsections during
// Some sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
if (getRecordSize(sections[i]->segname, sections[i]->name)) {
if (sections[i]->doneSplitting) {
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
@ -760,6 +792,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
continue;
}
sections[i]->doneSplitting = true;
// Calculate symbol sizes and create subsections by splitting the sections
// along symbol boundaries.
@ -930,6 +963,8 @@ template <class LP> void ObjFile::parse() {
}
if (compactUnwindSection)
registerCompactUnwind(*compactUnwindSection);
if (config->parseEhFrames && ehFrameSection)
registerEhFrames(*ehFrameSection);
}
template <class LP> void ObjFile::parseLazy() {
@ -1003,6 +1038,12 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
// of the corresponding relocations.) We rely on `relocateCompactUnwind()`
// to correctly handle these truncated input sections.
isec->data = isec->data.slice(target->wordSize);
uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
// llvm-mc omits CU entries for functions that need DWARF encoding, but
// `ld -r` doesn't. We can ignore them because we will re-synthesize these
// CU entries from the DWARF info during the output phase.
if ((encoding & target->modeDwarfEncoding) == target->modeDwarfEncoding)
continue;
ConcatInputSection *referentIsec;
for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
@ -1053,6 +1094,252 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
}
}
struct CIE {
macho::Symbol *personalitySymbol = nullptr;
bool fdesHaveLsda = false;
bool fdesHaveAug = false;
};
static CIE parseCIE(const InputSection *isec, const EhReader &reader,
size_t off) {
// Handling the full generality of possible DWARF encodings would be a major
// pain. We instead take advantage of our knowledge of how llvm-mc encodes
// DWARF and handle just that.
constexpr uint8_t expectedPersonalityEnc =
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
constexpr uint8_t expectedPointerEnc =
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
CIE cie;
uint8_t version = reader.readByte(&off);
if (version != 1 && version != 3)
fatal("Expected CIE version of 1 or 3, got " + Twine(version));
StringRef aug = reader.readString(&off);
reader.skipLeb128(&off); // skip code alignment
reader.skipLeb128(&off); // skip data alignment
reader.skipLeb128(&off); // skip return address register
reader.skipLeb128(&off); // skip aug data length
uint64_t personalityAddrOff = 0;
for (char c : aug) {
switch (c) {
case 'z':
cie.fdesHaveAug = true;
break;
case 'P': {
uint8_t personalityEnc = reader.readByte(&off);
if (personalityEnc != expectedPersonalityEnc)
reader.failOn(off, "unexpected personality encoding 0x" +
Twine::utohexstr(personalityEnc));
personalityAddrOff = off;
off += 4;
break;
}
case 'L': {
cie.fdesHaveLsda = true;
uint8_t lsdaEnc = reader.readByte(&off);
if (lsdaEnc != expectedPointerEnc)
reader.failOn(off, "unexpected LSDA encoding 0x" +
Twine::utohexstr(lsdaEnc));
break;
}
case 'R': {
uint8_t pointerEnc = reader.readByte(&off);
if (pointerEnc != expectedPointerEnc)
reader.failOn(off, "unexpected pointer encoding 0x" +
Twine::utohexstr(pointerEnc));
break;
}
default:
break;
}
}
if (personalityAddrOff != 0) {
auto personalityRelocIt =
llvm::find_if(isec->relocs, [=](const macho::Reloc &r) {
return r.offset == personalityAddrOff;
});
if (personalityRelocIt == isec->relocs.end())
reader.failOn(off, "Failed to locate relocation for personality symbol");
cie.personalitySymbol = personalityRelocIt->referent.get<macho::Symbol *>();
}
return cie;
}
// EH frame target addresses may be encoded as pcrel offsets. However, instead
// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
// This function recovers the target address from the subtractors, essentially
// performing the inverse operation of EhRelocator.
//
// Concretely, we expect our relocations to write the value of `PC -
// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
// points to a symbol or section plus an addend.
//
// If `Invert` is set, then we instead expect `target_addr - PC` to be written
// to `PC`.
template <bool Invert = false>
Defined *
getTargetSymbolFromSubtraction(const InputSection *isec,
std::vector<macho::Reloc>::iterator relocIt) {
const macho::Reloc &subtrahend = *relocIt;
const macho::Reloc &minuend = *std::next(relocIt);
assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
// Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
// addend.
auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
Defined *target =
cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
if (!pcSym) {
auto *targetIsec =
cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
target = findSymbolAtOffset(targetIsec, minuend.addend);
}
if (Invert)
std::swap(pcSym, target);
if (pcSym->isec != isec ||
pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
fatal("invalid FDE relocation in __eh_frame");
return target;
}
Defined *findSymbolAtAddress(const std::vector<Section *> &sections,
uint64_t addr) {
Section *sec = findContainingSection(sections, &addr);
auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
return findSymbolAtOffset(isec, addr);
}
// For symbols that don't have compact unwind info, associate them with the more
// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
//
// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
// description of its format.
//
// While parsing, we also look for what MC calls "abs-ified" relocations -- they
// are relocations which are implicitly encoded as offsets in the section data.
// We convert them into explicit Reloc structs so that the EH frames can be
// handled just like a regular ConcatInputSection later in our output phase.
//
// We also need to handle the case where our input object file has explicit
// relocations. This is the case when e.g. it's the output of `ld -r`. We only
// look for the "abs-ified" relocation if an explicit relocation is absent.
void ObjFile::registerEhFrames(Section &ehFrameSection) {
DenseMap<const InputSection *, CIE> cieMap;
for (const Subsection &subsec : ehFrameSection.subsections) {
auto *isec = cast<ConcatInputSection>(subsec.isec);
uint64_t isecOff = subsec.offset;
// Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
// that all EH frames have an associated symbol so that we can generate
// subtractor relocs that reference them.
if (isec->symbols.size() == 0)
isec->symbols.push_back(make<Defined>(
"EH_Frame", isec->getFile(), isec, /*value=*/0, /*size=*/0,
/*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
/*includeInSymtab=*/false, /*isThumb=*/false,
/*isReferencedDynamically=*/false, /*noDeadStrip=*/false));
else if (isec->symbols[0]->value != 0)
fatal("found symbol at unexpected offset in __eh_frame");
EhReader reader(this, isec->data, subsec.offset, target->wordSize);
size_t dataOff = 0; // Offset from the start of the EH frame.
reader.skipValidLength(&dataOff); // readLength() already validated this.
// cieOffOff is the offset from the start of the EH frame to the cieOff
// value, which is itself an offset from the current PC to a CIE.
const size_t cieOffOff = dataOff;
EhRelocator ehRelocator(isec);
auto cieOffRelocIt = llvm::find_if(
isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
InputSection *cieIsec = nullptr;
if (cieOffRelocIt != isec->relocs.end()) {
// We already have an explicit relocation for the CIE offset.
cieIsec =
getTargetSymbolFromSubtraction</*Invert=*/true>(isec, cieOffRelocIt)
->isec;
dataOff += sizeof(uint32_t);
} else {
// If we haven't found a relocation, then the CIE offset is most likely
// embedded in the section data (AKA an "abs-ified" reloc.). Parse that
// and generate a Reloc struct.
uint32_t cieMinuend = reader.readU32(&dataOff);
if (cieMinuend == 0)
cieIsec = isec;
else {
uint32_t cieOff = isecOff + dataOff - cieMinuend;
cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
if (cieIsec == nullptr)
fatal("failed to find CIE");
}
if (cieIsec != isec)
ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
/*length=*/2);
}
if (cieIsec == isec) {
cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
continue;
}
// Offset of the function address within the EH frame.
const size_t funcAddrOff = dataOff;
uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
isecOff + funcAddrOff;
uint32_t funcLength = reader.readPointer(&dataOff);
size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
assert(cieMap.count(cieIsec));
const CIE &cie = cieMap[cieIsec];
Optional<uint64_t> lsdaAddrOpt;
if (cie.fdesHaveAug) {
reader.skipLeb128(&dataOff);
lsdaAddrOff = dataOff;
if (cie.fdesHaveLsda) {
uint64_t lsdaOff = reader.readPointer(&dataOff);
if (lsdaOff != 0) // FIXME possible to test this?
lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
}
}
auto funcAddrRelocIt = isec->relocs.end();
auto lsdaAddrRelocIt = isec->relocs.end();
for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
if (it->offset == funcAddrOff)
funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
}
Defined *funcSym;
if (funcAddrRelocIt != isec->relocs.end()) {
funcSym = getTargetSymbolFromSubtraction(isec, funcAddrRelocIt);
} else {
funcSym = findSymbolAtAddress(sections, funcAddr);
ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
}
// The symbol has been coalesced, or already has a compact unwind entry.
if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
// We must prune unused FDEs for correctness, so we cannot rely on
// -dead_strip being enabled.
isec->live = false;
continue;
}
InputSection *lsdaIsec = nullptr;
if (lsdaAddrRelocIt != isec->relocs.end()) {
lsdaIsec = getTargetSymbolFromSubtraction(isec, lsdaAddrRelocIt)->isec;
} else if (lsdaAddrOpt) {
uint64_t lsdaAddr = *lsdaAddrOpt;
Section *sec = findContainingSection(sections, &lsdaAddr);
lsdaIsec =
cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
}
fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
funcSym->unwindEntry = isec;
ehRelocator.commit();
}
}
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);

View File

@ -60,7 +60,8 @@ struct Subsection {
using Subsections = std::vector<Subsection>;
class InputFile;
struct Section {
class Section {
public:
InputFile *file;
StringRef segname;
StringRef name;
@ -76,6 +77,13 @@ struct Section {
Section &operator=(const Section &) = delete;
Section(Section &&) = delete;
Section &operator=(Section &&) = delete;
private:
// Whether we have already split this section into individual subsections.
// For sections that cannot be split (e.g. literal sections), this is always
// false.
bool doneSplitting = false;
friend class ObjFile;
};
// Represents a call graph profile edge.
@ -135,6 +143,12 @@ private:
static int idCount;
};
struct FDE {
uint32_t funcLength;
Symbol *personality;
InputSection *lsda;
};
// .o file
class ObjFile final : public InputFile {
public:
@ -146,10 +160,11 @@ public:
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
llvm::DWARFUnit *compileUnit = nullptr;
Section *addrSigSection = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
Section *addrSigSection = nullptr;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
private:
template <class LP> void parseLazy();
@ -164,7 +179,9 @@ private:
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);
void parseDebugInfo();
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
void registerCompactUnwind(Section &compactUnwindSection);
void registerEhFrames(Section &ehFrameSection);
};
// command-line -sectcreate file

View File

@ -268,6 +268,11 @@ bool macho::isClassRefsSection(const InputSection *isec) {
isec->getSegName() == segment_names::data;
}
bool macho::isEhFrameSection(const InputSection *isec) {
return isec->getName() == section_names::ehFrame &&
isec->getSegName() == segment_names::text;
}
std::string lld::toString(const InputSection *isec) {
return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();
}

View File

@ -273,6 +273,7 @@ inline bool isWordLiteralSection(uint32_t flags) {
bool isCodeSection(const InputSection *);
bool isCfStringSection(const InputSection *);
bool isClassRefsSection(const InputSection *);
bool isEhFrameSection(const InputSection *);
extern std::vector<ConcatInputSection *> inputSections;

View File

@ -61,6 +61,13 @@ struct Reloc {
// gives the destination that this relocation refers to.
int64_t addend = 0;
llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
Reloc() = default;
Reloc(uint8_t type, bool pcrel, uint8_t length, uint32_t offset,
int64_t addend, llvm::PointerUnion<Symbol *, InputSection *> referent)
: type(type), pcrel(pcrel), length(length), offset(offset),
addend(addend), referent(referent) {}
};
bool validateSymbolRelocation(const Symbol *, const InputSection *,

View File

@ -183,6 +183,7 @@ public:
uint64_t value;
// size is only calculated for regular (non-bitcode) symbols.
uint64_t size;
// This can be a subsection of either __compact_unwind or __eh_frame.
ConcatInputSection *unwindEntry = nullptr;
};

View File

@ -14,6 +14,7 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstddef>
@ -37,6 +38,7 @@ public:
pageZeroSize = LP::pageZeroSize;
headerSize = sizeof(typename LP::mach_header);
wordSize = LP::wordSize;
p2WordSize = llvm::CTLog2<LP::wordSize>();
}
virtual ~TargetInfo() = default;
@ -85,12 +87,17 @@ public:
size_t stubSize;
size_t stubHelperHeaderSize;
size_t stubHelperEntrySize;
uint8_t p2WordSize;
size_t wordSize;
size_t thunkSize = 0;
uint64_t forwardBranchRange = 0;
uint64_t backwardBranchRange = 0;
uint32_t modeDwarfEncoding;
uint8_t subtractorRelocType;
uint8_t unsignedRelocType;
// We contrive this value as sufficiently far from any valid address that it
// will always be out-of-range for any architecture. UINT64_MAX is not a
// good choice because it is (a) only 1 away from wrapping to 0, and (b) the

View File

@ -28,6 +28,7 @@
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::macho;
@ -222,7 +223,8 @@ void UnwindInfoSectionImpl::prepareRelocations() {
// entries to the GOT. Hence the use of a MapVector for
// UnwindInfoSection::symbols.
for (const Defined *d : make_second_range(symbols))
if (d->unwindEntry)
if (d->unwindEntry &&
d->unwindEntry->getName() == section_names::compactUnwind)
prepareRelocations(d->unwindEntry);
}
@ -331,6 +333,18 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
if (!d->unwindEntry)
return;
// If we have DWARF unwind info, create a CU entry that points to it.
if (d->unwindEntry->getName() == section_names::ehFrame) {
cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff;
const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
cu.functionLength = fde.funcLength;
cu.personality = fde.personality;
cu.lsda = fde.lsda;
return;
}
assert(d->unwindEntry->getName() == section_names::compactUnwind);
auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
target->wordSize;
cu.functionLength =

View File

@ -950,8 +950,14 @@ template <class LP> void Writer::createOutputSections() {
StringRef segname = it.first.first;
ConcatOutputSection *osec = it.second;
assert(segname != segment_names::ld);
if (osec->isNeeded())
if (osec->isNeeded()) {
// See comment in ObjFile::splitEhFrames()
if (osec->name == section_names::ehFrame &&
segname == segment_names::text)
osec->align = target->wordSize;
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
}
for (SyntheticSection *ssec : syntheticSections) {

Binary file not shown.

161
lld/test/MachO/eh-frame.s Normal file
View File

@ -0,0 +1,161 @@
# REQUIRES: x86
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %s -o %t/eh-frame-x86_64.o
# RUN: %lld -lSystem -lc++ %t/eh-frame-x86_64.o -o %t/eh-frame-x86_64
# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
# RUN: --dwarf=frames %t/eh-frame-x86_64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
# RUN: llvm-nm -m %t/eh-frame-x86_64 | FileCheck %s --check-prefix NO-EH-SYMS
# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64 | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
## Test that we correctly handle the output of `ld -r`, which emits EH frames
## using subtractor relocations instead of implicitly encoding the offsets.
## In order to keep this test cross-platform, we check in ld64's output rather
## than invoking ld64 directly. NOTE: whenever this test is updated, the
## checked-in copy of `ld -r`'s output should be updated too!
# COM: ld -r %t/eh-frame-x86_64.o -o %S/Inputs/eh-frame-x86_64-r.o
# RUN: %lld -lSystem -lc++ %S/Inputs/eh-frame-x86_64-r.o -o %t/eh-frame-x86_64-r
# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
# RUN: --dwarf=frames %t/eh-frame-x86_64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
# RUN: llvm-nm -m %t/eh-frame-x86_64-r | FileCheck %s --check-prefix NO-EH-SYMS
# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64-r | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
# ALIGN: Name: __eh_frame
# ALIGN-NEXT: Segment: __TEXT
# ALIGN-NEXT: Address:
# ALIGN-NEXT: Size:
# ALIGN-NEXT: Offset:
# ALIGN-NEXT: Alignment: [[#ALIGN]]
# NO-EH-SYMS-NOT: __eh_frame
# CHECK: Indirect symbols for (__DATA_CONST,__got) 2 entries
# CHECK: address index name
# CHECK: 0x[[#%x,GXX_PERSONALITY_GOT:]] {{.*}} ___gxx_personality_v0
# CHECK: 0x[[#%x,MY_PERSONALITY_GOT:]]
# CHECK: SYMBOL TABLE:
# CHECK-DAG: [[#%x,F:]] l F __TEXT,__text _f
# CHECK-DAG: [[#%x,NO_UNWIND:]] l F __TEXT,__text _no_unwind
# CHECK-DAG: [[#%x,G:]] l F __TEXT,__text _g
# CHECK-DAG: [[#%x,H:]] l F __TEXT,__text _h
# CHECK-DAG: [[#%x,EXCEPT0:]] l O __TEXT,__gcc_except_tab GCC_except_table0
# CHECK-DAG: [[#%x,EXCEPT1:]] l O __TEXT,__gcc_except_tab GCC_except_table1
# CHECK-DAG: [[#%x,EXCEPT2:]] l O __TEXT,custom_except custom_except_table2
# CHECK-DAG: [[#%x,MY_PERSONALITY:]] g F __TEXT,__text _my_personality
# CHECK: Contents of __unwind_info section:
# CHECK: Version: 0x1
# CHECK: Number of personality functions in array: 0x2
# CHECK: Number of indices in array: 0x2
# CHECK: Personality functions: (count = 2)
# CHECK: personality[1]: 0x[[#%.8x,GXX_PERSONALITY_GOT - BASE]]
# CHECK: personality[2]: 0x[[#%.8x,MY_PERSONALITY_GOT - BASE]]
# CHECK: LSDA descriptors:
# CHECK: [0]: function offset=0x[[#%.8x,F - BASE]], LSDA offset=0x[[#%.8x,EXCEPT0 - BASE]]
# CHECK: [1]: function offset=0x[[#%.8x,G - BASE]], LSDA offset=0x[[#%.8x,EXCEPT1 - BASE]]
# CHECK: [2]: function offset=0x[[#%.8x,H - BASE]], LSDA offset=0x[[#%.8x,EXCEPT2 - BASE]]
# CHECK: Second level indices:
# CHECK: Second level index[0]:
# CHECK: [0]: function offset=0x[[#%.8x,F - BASE]], encoding[{{.*}}]=0x52{{.*}}
# CHECK: [1]: function offset=0x[[#%.8x,NO_UNWIND - BASE]], encoding[{{.*}}]=0x00000000
# CHECK: [2]: function offset=0x[[#%.8x,G - BASE]], encoding[{{.*}}]=0x1[[#%x,DWARF_ENC]][[#%.6x, G_DWARF_OFF:]]
# CHECK: [3]: function offset=0x[[#%.8x,H - BASE]], encoding[{{.*}}]=0x2[[#%x,DWARF_ENC]][[#%.6x, H_DWARF_OFF:]]
# CHECK: [4]: function offset=0x[[#%.8x,MY_PERSONALITY - BASE]], encoding[{{.*}}]=0x00000000
# CHECK: .debug_frame contents:
# CHECK: .eh_frame contents:
# CHECK: [[#%.8x,CIE1_OFF:]] {{.*}} CIE
# CHECK: Format: DWARF32
# CHECK: Version: 1
# CHECK: Augmentation: "zPLR"
# CHECK: Code alignment factor: 1
# CHECK: Data alignment factor: -8
# CHECK: Return address column:
# CHECK: Personality Address: [[#%.16x,GXX_PERSONALITY_GOT]]
# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10
# CHECK: [[#%.8x,G_DWARF_OFF]] {{.*}} [[#%.8x,G_DWARF_OFF + 4 - CIE1_OFF]] FDE cie=[[#CIE1_OFF]] pc=[[#%x,G]]
# CHECK: Format: DWARF32
# CHECK: LSDA Address: [[#%.16x,EXCEPT1]]
# CHECK: DW_CFA_def_cfa_offset: +8
# CHECK: 0x[[#%x,G]]:
# CHECK: [[#%.8x,CIE2_OFF:]] {{.*}} CIE
# CHECK: Format: DWARF32
# CHECK: Version: 1
# CHECK: Augmentation: "zPLR"
# CHECK: Code alignment factor: 1
# CHECK: Data alignment factor: -8
# CHECK: Return address column:
# CHECK: Personality Address: [[#%.16x,MY_PERSONALITY_GOT]]
# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10
# CHECK: [[#%.8x,H_DWARF_OFF]] {{.*}} [[#%.8x,H_DWARF_OFF + 4 - CIE2_OFF]] FDE cie=[[#CIE2_OFF]] pc=[[#%x,H]]
# CHECK: Format: DWARF32
# CHECK: LSDA Address: [[#%.16x,EXCEPT2]]
# CHECK: DW_CFA_def_cfa_offset: +8
# CHECK: 0x[[#%x,H]]:
.globl _my_personality, _main
.text
## _f's unwind info can be encoded with compact unwind, so we shouldn't see an
## FDE entry for it in the output file.
.p2align 2
_f:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception0
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.p2align 2
_no_unwind:
ret
.p2align 2
_g:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception1
.cfi_def_cfa_offset 8
## cfi_escape cannot be encoded in compact unwind, so we must keep _g's FDE
.cfi_escape 0x2e, 0x10
ret
.cfi_endproc
.p2align 2
_h:
.cfi_startproc
.cfi_personality 155, _my_personality
.cfi_lsda 16, Lexception2
.cfi_def_cfa_offset 8
## cfi_escape cannot be encoded in compact unwind, so we must keep _h's FDE
.cfi_escape 0x2e, 0x10
ret
.cfi_endproc
.p2align 2
_my_personality:
ret
.p2align 2
_main:
ret
.section __TEXT,__gcc_except_tab
GCC_except_table0:
Lexception0:
.byte 255
GCC_except_table1:
Lexception1:
.byte 255
.section __TEXT,custom_except
custom_except_table2:
Lexception2:
.byte 255
.subsections_via_symbols

View File

@ -0,0 +1,83 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-1.s -o %t/too-small-1.o
# RUN: not %lld -lSystem -dylib %t/too-small-1.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-1
# TOO-SMALL-1: error: {{.*}}too-small-1.o:(__eh_frame+0x0): CIE/FDE too small
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-2.s -o %t/too-small-2.o
# RUN: not %lld -lSystem -dylib %t/too-small-2.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-2
# TOO-SMALL-2: error: {{.*}}too-small-2.o:(__eh_frame+0x0): CIE/FDE extends past the end of the section
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/personality-enc.s -o %t/personality-enc.o
# RUN: not %lld -lSystem -dylib %t/personality-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PERS-ENC
# PERS-ENC: error: {{.*}}personality-enc.o:(__eh_frame+0x12): unexpected personality encoding 0xb
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/pointer-enc.s -o %t/pointer-enc.o
# RUN: not %lld -lSystem -dylib %t/pointer-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PTR-ENC
# PTR-ENC: error: {{.*}}pointer-enc.o:(__eh_frame+0x11): unexpected pointer encoding 0x12
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/string-err.s -o %t/string-err.o
# RUN: not %lld -lSystem -dylib %t/string-err.o -o /dev/null 2>&1 | FileCheck %s --check-prefix STR
# STR: error: {{.*}}string-err.o:(__eh_frame+0x9): corrupted CIE (failed to read string)
#--- too-small-1.s
.p2align 3
.section __TEXT,__eh_frame
.short 0x3
.subsections_via_symbols
#--- too-small-2.s
.p2align 3
.section __TEXT,__eh_frame
.long 0x3 # length
.subsections_via_symbols
#--- personality-enc.s
.p2align 3
.section __TEXT,__eh_frame
.long 0x14 # length
.long 0 # CIE offset
.byte 1 # version
.asciz "zPR" # aug string
.byte 0x01 # code alignment
.byte 0x78 # data alignment
.byte 0x10 # return address register
.byte 0x01 # aug length
.byte 0x0b # personality encoding
.long 0xffff # personality pointer
.byte 0x10 # pointer encoding
.space 1 # pad to alignment
.subsections_via_symbols
#--- pointer-enc.s
.p2align 3
.section __TEXT,__eh_frame
.long 0x14 # length
.long 0 # CIE offset
.byte 1 # version
.asciz "zR" # aug string
.byte 0x01 # code alignment
.byte 0x78 # data alignment
.byte 0x10 # return address register
.byte 0x01 # aug length
.byte 0x12 # pointer encoding
.space 7 # pad to alignment
.subsections_via_symbols
#--- string-err.s
.p2align 3
.section __TEXT,__eh_frame
.long 0x7 # length
.long 0 # CIE offset
.byte 1 # version
.ascii "zR" # invalid aug string
.subsections_via_symbols

View File

@ -1,27 +1,33 @@
# REQUIRES: x86
## FIXME: This yaml is from an object file produced with 'ld -r'
## Replace this with "normal" .s test format once lld supports `-r`
## FIXME: This yaml is from an object file produced with 'ld -r':
##
## echo "int main() {return 1;}" > test.c
## clang -c -g -o test.o test.c
## ld -r -o test2.o test.o -no_data_in_code_info
##
## Replace this with "normal" .s test format once lld supports `-r`
# RUN: yaml2obj %s -o %t.o
# RUN: %lld -lSystem -platform_version macos 11.3 11.0 -arch x86_64 %t.o -o %t
# RUN: %lld -lSystem -arch x86_64 %t.o -o %t
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x01000007
cpusubtype: 0x00000003
filetype: 0x00000001
ncmds: 2
sizeofcmds: 384
flags: 0x00002000
reserved: 0x00000000
cputype: 0x1000007
cpusubtype: 0x3
filetype: 0x1
ncmds: 3
sizeofcmds: 288
flags: 0x2000
reserved: 0x0
LoadCommands:
- cmd: LC_SEGMENT_64
cmdsize: 312
cmdsize: 232
segname: ''
vmaddr: 0
vmsize: 120
fileoff: 448
filesize: 120
vmsize: 56
fileoff: 352
filesize: 56
maxprot: 7
initprot: 7
nsects: 2
@ -29,57 +35,33 @@ LoadCommands:
Sections:
- sectname: __text
segname: __TEXT
addr: 0x0000000000000000
addr: 0x0
size: 18
offset: 0x000001C0
offset: 0x160
align: 4
reloff: 0x00000000
reloff: 0x0
nreloc: 0
flags: 0x80000400
reserved1: 0x00000000
reserved2: 0x00000000
reserved3: 0x00000000
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: 554889E5C745FC00000000B8010000005DC3
- sectname: __eh_frame
segname: __TEXT
addr: 0x0000000000000018
size: 64
offset: 0x000001D8
- sectname: __compact_unwind
segname: __LD
addr: 0x18
size: 32
offset: 0x178
align: 3
reloff: 0x00000238
nreloc: 4
flags: 0x00000000
reserved1: 0x00000000
reserved2: 0x00000000
reserved3: 0x00000000
content: 1400000000000000017A520001781001100C0708900100002400000004000000F8FFFFFFFFFFFFFF120000000000000000410E108602430D0600000000000000
reloff: 0x198
nreloc: 1
flags: 0x2000000
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: '0000000000000000120000000000000100000000000000000000000000000000'
relocations:
- address: 0x0000001C
symbolnum: 0
pcrel: false
length: 2
extern: true
type: 5
scattered: false
value: 0
- address: 0x0000001C
symbolnum: 1
pcrel: false
length: 2
extern: true
type: 0
scattered: false
value: 0
- address: 0x00000020
symbolnum: 1
pcrel: false
length: 3
extern: true
type: 5
scattered: false
value: 0
- address: 0x00000020
symbolnum: 10
- address: 0x0
symbolnum: 8
pcrel: false
length: 3
extern: true
@ -88,39 +70,72 @@ LoadCommands:
value: 0
- cmd: LC_SYMTAB
cmdsize: 24
symoff: 608
nsyms: 11
stroff: 784
strsize: 72
symoff: 416
nsyms: 9
stroff: 560
strsize: 48
- cmd: LC_BUILD_VERSION
cmdsize: 32
platform: 1
minos: 659200
sdk: 0
ntools: 1
Tools:
- tool: 3
version: 46596096
LinkEditData:
NameList:
- n_strx: 8 ## N_STAB sym (in got)
n_type: 0x0E
n_sect: 2
- n_strx: 8
n_type: 0x64 ## N_SO STAB
n_sect: 0
n_desc: 0
n_value: 24
- n_strx: 18
n_type: 0x0E
n_sect: 2
n_value: 0
- n_strx: 14
n_type: 0x64 ## N_SO STAB
n_sect: 0
n_desc: 0
n_value: 48
n_value: 0
- n_strx: 21
n_type: 0x66 ## N_OSO STAB
n_sect: 3
n_desc: 1
n_value: 1651001352
- n_strx: 1
n_type: 0x4E
n_type: 0x2E ## N_BNSYM STAB
n_sect: 1
n_desc: 0
n_value: 0
- n_strx: 41
n_type: 0x24 ## N_FUN STAB
n_sect: 1
n_desc: 0
n_value: 0
- n_strx: 1
n_type: 0x24 ## N_FUN STAB
n_sect: 0
n_desc: 0
n_value: 18
- n_strx: 1
n_type: 0x4E ## N_ENSYM STAB
n_sect: 1
n_desc: 0
n_value: 18
- n_strx: 2 ## _main
n_type: 0x0F
- n_strx: 1
n_type: 0x64 ## N_SO STAB
n_sect: 1
n_desc: 0
n_value: 0
- n_strx: 2
n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 0
StringTable:
- ' '
- _main
- EH_Frame1
- func.eh
- '/Users/vyng/'
- test.cc
- '/Users/vyng/test.o'
- '/tmp/'
- test.c
- '/private/tmp/test.o'
- _main
- ''
...

View File

@ -24,9 +24,6 @@ def print_function(name):
have_lsda = (random.random() < lsda_odds)
frame_size = random.randint(4, 64) * 16
frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
reg_count = random.randint(0, 5)
reg_combo = random.randint(0, factorial(reg_count) - 1)
regs_saved = saved_regs_combined[reg_count][reg_combo]
global func_size_low, func_size_high
func_size = random.randint(func_size_low, func_size_high) * 0x10
func_size_high += 1
@ -34,13 +31,13 @@ def print_function(name):
func_size_low += 1
print("""\
### %s regs=%d frame=%d lsda=%s size=%d
### %s frame=%d lsda=%s size=%d
.section __TEXT,__text,regular,pure_instructions
.p2align 4, 0x90
.globl %s
%s:
.cfi_startproc""" % (
name, reg_count, frame_size, have_lsda, func_size, name, name))
name, frame_size, have_lsda, func_size, name, name))
if have_lsda:
global lsda_n
lsda_n += 1
@ -53,8 +50,6 @@ def print_function(name):
.cfi_offset %%rbp, %d
movq %%rsp, %%rbp
.cfi_def_cfa_register %%rbp""" % (frame_size, frame_offset + 6*8))
for i in range(reg_count):
print(".cfi_offset %s, %d" % (regs_saved[i], frame_offset+(i*8)))
print("""\
.fill %d
popq %%rbp