[lld-macho][reland] Initial support for EH Frames

This reverts commit 942f4e3a7c. The additional change required to avoid the assertion errors seen previously is: --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -443,7 +443,9 @@ void macho::foldIdenticalSections() { /*relocVA=*/0); isec->data = copy; } - } else { + } else if (!isEhFrameSection(isec)) { + // EH frames are gathered as hashables from unwindEntry above; give a + // unique ID to everything else. isec->icfEqClass[0] = ++icfUniqueID; } } Differential Revision: https://reviews.llvm.org/D123435
2022-06-12 21:56:45 -04:00 · 2022-06-12 21:56:45 -04:00 · e183bf8e15
parent e4a21e1644
commit e183bf8e15
23 changed files with 991 additions and 110 deletions
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@ -13,6 +13,7 @@
 #include "Target.h"

 #include "lld/Common/ErrorHandler.h"
+#include "mach-o/compact_unwind_encoding.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
@ -141,6 +142,10 @@ ARM64::ARM64() : ARM64Common(LP64()) {
  backwardBranchRange = 128 * 1024 * 1024;
  forwardBranchRange = backwardBranchRange - 4;

+  modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
+  subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
+  unsignedRelocType = ARM64_RELOC_UNSIGNED;
+
  stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
  stubHelperEntrySize = sizeof(stubHelperEntryCode);
 }
--- a/lld/MachO/Arch/ARM64_32.cpp
+++ b/lld/MachO/Arch/ARM64_32.cpp
@ -105,6 +105,10 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) {
  cpuType = CPU_TYPE_ARM64_32;
  cpuSubtype = CPU_SUBTYPE_ARM64_V8;

+  modeDwarfEncoding = 0x04000000;              // UNWIND_ARM_MODE_DWARF
+  subtractorRelocType = GENERIC_RELOC_INVALID; // FIXME
+  unsignedRelocType = GENERIC_RELOC_INVALID;   // FIXME
+
  stubSize = sizeof(stubCode);
  stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
  stubHelperEntrySize = sizeof(stubHelperEntryCode);
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@ -12,6 +12,7 @@
 #include "Target.h"

 #include "lld/Common/ErrorHandler.h"
+#include "mach-o/compact_unwind_encoding.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"

@ -185,6 +186,10 @@ X86_64::X86_64() : TargetInfo(LP64()) {
  cpuType = CPU_TYPE_X86_64;
  cpuSubtype = CPU_SUBTYPE_X86_64_ALL;

+  modeDwarfEncoding = UNWIND_X86_MODE_DWARF;
+  subtractorRelocType = X86_64_RELOC_SUBTRACTOR;
+  unsignedRelocType = X86_64_RELOC_UNSIGNED;
+
  stubSize = sizeof(stub);
  stubHelperHeaderSize = sizeof(stubHelperHeader);
  stubHelperEntrySize = sizeof(stubHelperEntry);
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@ -14,6 +14,7 @@ add_lld_library(lldMachO
  Driver.cpp
  DriverUtils.cpp
  Dwarf.cpp
+  EhFrame.cpp
  ExportTrie.cpp
  ICF.cpp
  InputFiles.cpp
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@ -130,6 +130,9 @@ struct Configuration {
  bool dedupLiterals = true;
  bool omitDebugInfo = false;
  bool warnDylibInstallName = false;
+  // Temporary config flag that will be removed once we have fully implemented
+  // support for __eh_frame.
+  bool parseEhFrames = false;
  uint32_t headerPad;
  uint32_t dylibCompatibilityVersion = 0;
  uint32_t dylibCurrentVersion = 0;
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@ -1039,8 +1039,9 @@ static void gatherInputSections() {
  int inputOrder = 0;
  for (const InputFile *file : inputFiles) {
    for (const Section *section : file->sections) {
+      // Compact unwind entries require special handling elsewhere. (In
+      // contrast, EH frames are handled like regular ConcatInputSections.)
      if (section->name == section_names::compactUnwind)
-        // Compact unwind entries require special handling elsewhere.
        continue;
      ConcatOutputSection *osec = nullptr;
      for (const Subsection &subsection : section->subsections) {
@ -1302,6 +1303,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
  config->callGraphProfileSort = args.hasFlag(
      OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
  config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
+  config->parseEhFrames = static_cast<bool>(getenv("LLD_IN_TEST"));

  // FIXME: Add a commandline flag for this too.
  config->zeroModTime = getenv("ZERO_AR_DATE");
--- a/lld/MachO/EhFrame.cpp
+++ b/lld/MachO/EhFrame.cpp
@ -0,0 +1,140 @@
+//===- EhFrame.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "EhFrame.h"
+#include "InputFiles.h"
+
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+using namespace llvm::support::endian;
+
+uint64_t EhReader::readLength(size_t *off) const {
+  const size_t errOff = *off;
+  if (*off + 4 > data.size())
+    failOn(errOff, "CIE/FDE too small");
+  uint64_t len = read32le(data.data() + *off);
+  *off += 4;
+  if (len == dwarf::DW_LENGTH_DWARF64) {
+    // FIXME: test this DWARF64 code path
+    if (*off + 8 > data.size())
+      failOn(errOff, "CIE/FDE too small");
+    len = read64le(data.data() + *off);
+    *off += 8;
+  }
+  if (*off + len > data.size())
+    failOn(errOff, "CIE/FDE extends past the end of the section");
+  return len;
+}
+
+void EhReader::skipValidLength(size_t *off) const {
+  uint32_t len = read32le(data.data() + *off);
+  *off += 4;
+  if (len == dwarf::DW_LENGTH_DWARF64)
+    *off += 8;
+}
+
+// Read a byte and advance off by one byte.
+uint8_t EhReader::readByte(size_t *off) const {
+  if (*off + 1 > data.size())
+    failOn(*off, "unexpected end of CIE/FDE");
+  return data[(*off)++];
+}
+
+uint32_t EhReader::readU32(size_t *off) const {
+  if (*off + 4 > data.size())
+    failOn(*off, "unexpected end of CIE/FDE");
+  uint32_t v = read32le(data.data() + *off);
+  *off += 4;
+  return v;
+}
+
+uint64_t EhReader::readPointer(size_t *off) const {
+  if (*off + wordSize > data.size())
+    failOn(*off, "unexpected end of CIE/FDE");
+  uint64_t v;
+  if (wordSize == 8)
+    v = read64le(data.data() + *off);
+  else {
+    assert(wordSize == 4);
+    v = read32le(data.data() + *off);
+  }
+  *off += wordSize;
+  return v;
+}
+
+// Read a null-terminated string.
+StringRef EhReader::readString(size_t *off) const {
+  if (*off > data.size())
+    failOn(*off, "corrupted CIE (failed to read string)");
+  const size_t maxlen = data.size() - *off;
+  auto *c = reinterpret_cast<const char *>(data.data() + *off);
+  size_t len = strnlen(c, maxlen);
+  if (len == maxlen) // we failed to find the null terminator
+    failOn(*off, "corrupted CIE (failed to read string)");
+  *off += len + 1; // skip the null byte too
+  return StringRef(c, len);
+}
+
+void EhReader::skipLeb128(size_t *off) const {
+  const size_t errOff = *off;
+  while (*off < data.size()) {
+    uint8_t val = data[(*off)++];
+    if ((val & 0x80) == 0)
+      return;
+  }
+  failOn(errOff, "corrupted CIE (failed to read LEB128)");
+}
+
+void EhReader::failOn(size_t errOff, const Twine &msg) const {
+  fatal(toString(file) + ":(__eh_frame+0x" +
+        Twine::utohexstr(dataOff + errOff) + "): " + msg);
+}
+
+/*
+ * Create a pair of relocs to write the value of:
+ *   `b - (offset + a)` if Invert == false
+ *   `(a + offset) - b` if Invert == true
+ */
+template <bool Invert = false>
+static void createSubtraction(PointerUnion<Symbol *, InputSection *> a,
+                              PointerUnion<Symbol *, InputSection *> b,
+                              uint64_t off, uint8_t length,
+                              SmallVectorImpl<Reloc> *newRelocs) {
+  auto subtrahend = a;
+  auto minuend = b;
+  if (Invert)
+    std::swap(subtrahend, minuend);
+  assert(subtrahend.is<Symbol *>());
+  Reloc subtrahendReloc(target->subtractorRelocType, /*pcrel=*/false, length,
+                        off, /*addend=*/0, subtrahend);
+  Reloc minuendReloc(target->unsignedRelocType, /*pcrel=*/false, length, off,
+                     (Invert ? 1 : -1) * off, minuend);
+  newRelocs->push_back(subtrahendReloc);
+  newRelocs->push_back(minuendReloc);
+}
+
+void EhRelocator::makePcRel(uint64_t off,
+                            PointerUnion<Symbol *, InputSection *> target,
+                            uint8_t length) {
+  createSubtraction(isec->symbols[0], target, off, length, &newRelocs);
+}
+
+void EhRelocator::makeNegativePcRel(
+    uint64_t off, PointerUnion<Symbol *, InputSection *> target,
+    uint8_t length) {
+  createSubtraction</*Invert=*/true>(isec, target, off, length, &newRelocs);
+}
+
+void EhRelocator::commit() {
+  isec->relocs.insert(isec->relocs.end(), newRelocs.begin(), newRelocs.end());
+}
--- a/lld/MachO/EhFrame.h
+++ b/lld/MachO/EhFrame.h
@ -0,0 +1,120 @@
+//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_EH_FRAME_H
+#define LLD_MACHO_EH_FRAME_H
+
+#include "InputSection.h"
+#include "Relocations.h"
+
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+
+/*
+ * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
+ * is closely coupled with other file parsing logic; EhFrame.h just contains a
+ * few helpers.
+ */
+
+/*
+ * === The EH frame format ===
+ *
+ * EH frames can either be Common Information Entries (CIEs) or Frame
+ * Description Entries (FDEs). CIEs contain information that is common amongst
+ * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
+ * entries together form a forest of two-level trees, with CIEs as the roots
+ * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
+ * to it.
+ *
+ * A CIE comprises the following fields in order:
+ * 1.   Length of the entry (4 or 12 bytes)
+ * 2.   CIE offset (4 bytes; always 0 for CIEs)
+ * 3.   CIE version (byte)
+ * 4.   Null-terminated augmentation string
+ * 5-8. LEB128 values that we don't care about
+ * 9.   Augmentation data, to be interpreted using the aug string
+ * 10.  DWARF instructions (ignored by LLD)
+ *
+ * An FDE comprises of the following:
+ * 1. Length of the entry (4 or 12 bytes)
+ * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
+ * 3. Function address (pointer-sized pcrel offset)
+ * 4. (Optional) Augmentation data length
+ * 5. (Optional) LSDA address (pointer-sized pcrel offset)
+ * 6. DWARF instructions (ignored by LLD)
+ */
+namespace lld {
+namespace macho {
+
+class EhReader {
+public:
+  EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
+           size_t wordSize)
+      : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
+  size_t size() const { return data.size(); }
+  // Read and validate the length field.
+  uint64_t readLength(size_t *off) const;
+  // Skip the length field without doing validation.
+  void skipValidLength(size_t *off) const;
+  uint8_t readByte(size_t *off) const;
+  uint32_t readU32(size_t *off) const;
+  uint64_t readPointer(size_t *off) const;
+  StringRef readString(size_t *off) const;
+  void skipLeb128(size_t *off) const;
+  void failOn(size_t errOff, const Twine &msg) const;
+
+private:
+  const ObjFile *file;
+  ArrayRef<uint8_t> data;
+  // The offset of the data array within its section. Used only for error
+  // reporting.
+  const size_t dataOff;
+  size_t wordSize;
+};
+
+// The EH frame format, when emitted by llvm-mc, consists of a number of
+// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
+// pcrel offsets in the section data. The offsets refer to the locations of
+// symbols in the input object file. When we ingest these EH frames, we convert
+// these implicit relocations into explicit Relocs.
+//
+// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
+// However, we need this operation to be cross-platform, and ARM does not have a
+// similar relocation that is applicable. We therefore use the more verbose (but
+// more generic) subtractor relocation to encode these pcrel values. ld64
+// appears to do something similar -- its `-r` output contains these explicit
+// subtractor relocations.
+class EhRelocator {
+public:
+  EhRelocator(InputSection *isec) : isec(isec) {}
+
+  // For the next two methods, let `PC` denote `isec address + off`.
+  // Create relocs writing the value of target - PC to PC.
+  void makePcRel(uint64_t off,
+                 llvm::PointerUnion<Symbol *, InputSection *> target,
+                 uint8_t length);
+  // Create relocs writing the value of PC - target to PC.
+  void makeNegativePcRel(uint64_t off,
+                         llvm::PointerUnion<Symbol *, InputSection *> target,
+                         uint8_t length);
+  // Insert the new relocations into isec->relocs.
+  void commit();
+
+private:
+  InputSection *isec;
+  // Insert new relocs here so that we don't invalidate iterators into the
+  // existing relocs vector.
+  SmallVector<Reloc, 6> newRelocs;
+};
+
+} // namespace macho
+} // namespace lld
+
+#endif
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@ -212,9 +212,9 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
  // info matches. For simplicity, we only handle the case where there are only
  // symbols at offset zero within the section (which is typically the case with
  // .subsections_via_symbols.)
-  auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
-  auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
-  auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
+  auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
+  auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasUnwind);
+  auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasUnwind);
  if (itA == ia->symbols.end())
    return itB == ib->symbols.end();
  if (itB == ib->symbols.end())
@ -443,7 +443,9 @@ void macho::foldIdenticalSections() {
                              /*relocVA=*/0);
        isec->data = copy;
      }
-    } else {
+    } else if (!isEhFrameSection(isec)) {
+      // EH frames are gathered as hashables from unwindEntry above; give a
+      // unique ID to everything else.
      isec->icfEqClass[0] = ++icfUniqueID;
    }
  }
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@ -45,6 +45,7 @@
 #include "Config.h"
 #include "Driver.h"
 #include "Dwarf.h"
+#include "EhFrame.h"
 #include "ExportTrie.h"
 #include "InputSection.h"
 #include "MachOStructs.h"
@ -323,6 +324,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
            section, data.slice(off, recordSize), align);
        subsections.push_back({off, isec});
      }
+      section.doneSplitting = true;
    };

    if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
@ -344,6 +346,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
      section.subsections.push_back({0, isec});
    } else if (auto recordSize = getRecordSize(segname, name)) {
      splitRecords(*recordSize);
+    } else if (config->parseEhFrames && name == section_names::ehFrame &&
+               segname == segment_names::text) {
+      splitEhFrames(data, *sections.back());
    } else if (segname == segment_names::llvm) {
      if (config->callGraphProfileSort && name == section_names::cgProfile)
        checkError(parseCallGraph(data, callGraph));
@ -371,6 +376,45 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
  }
 }

+void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
+  EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+  size_t off = 0;
+  while (off < reader.size()) {
+    uint64_t frameOff = off;
+    uint64_t length = reader.readLength(&off);
+    if (length == 0)
+      break;
+    uint64_t fullLength = length + (off - frameOff);
+    off += length;
+    // We hard-code an alignment of 1 here because we don't actually want our
+    // EH frames to be aligned to the section alignment. EH frame decoders don't
+    // expect this alignment. Moreover, each EH frame must start where the
+    // previous one ends, and where it ends is indicated by the length field.
+    // Unless we update the length field (troublesome), we should keep the
+    // alignment to 1.
+    // Note that we still want to preserve the alignment of the overall section,
+    // just not of the individual EH frames.
+    ehFrameSection.subsections.push_back(
+        {frameOff, make<ConcatInputSection>(ehFrameSection,
+                                            data.slice(frameOff, fullLength),
+                                            /*align=*/1)});
+  }
+  ehFrameSection.doneSplitting = true;
+}
+
+template <class T>
+static Section *findContainingSection(const std::vector<Section *> &sections,
+                                      T *offset) {
+  static_assert(std::is_same<uint64_t, T>::value ||
+                    std::is_same<uint32_t, T>::value,
+                "unexpected type for offset");
+  auto it = std::prev(llvm::upper_bound(
+      sections, *offset,
+      [](uint64_t value, const Section *sec) { return value < sec->addr; }));
+  *offset -= (*it)->addr;
+  return *it;
+}
+
 // Find the subsection corresponding to the greatest section offset that is <=
 // that of the given offset.
 //
@ -475,13 +519,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
    relocation_info relInfo = relInfos[i];
    bool isSubtrahend =
        target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
-    if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
-      // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
-      // adds local "EH_Frame1" and "func.eh". Ignore them because they have
-      // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
-      ++i;
-      continue;
-    }
    int64_t pairedAddend = 0;
    if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
      pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@ -637,7 +674,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
  }
  assert(!isWeakDefCanBeHidden &&
         "weak_def_can_be_hidden on already-hidden symbol?");
-  bool includeInSymtab = !name.startswith("l") && !name.startswith("L");
+  bool includeInSymtab =
+      !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
  return make<Defined>(
      name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
      /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
@ -730,20 +768,14 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
    Subsections &subsections = sections[i]->subsections;
    if (subsections.empty())
      continue;
-    if (sections[i]->name == section_names::ehFrame) {
-      // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
-      // adds local "EH_Frame1" and "func.eh". Ignore them because they have
-      // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
-      continue;
-    }
    std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
    uint64_t sectionAddr = sectionHeaders[i].addr;
    uint32_t sectionAlign = 1u << sectionHeaders[i].align;

-    // Record-based sections have already been split into subsections during
+    // Some sections have already been split into subsections during
    // parseSections(), so we simply need to match Symbols to the corresponding
    // subsection here.
-    if (getRecordSize(sections[i]->segname, sections[i]->name)) {
+    if (sections[i]->doneSplitting) {
      for (size_t j = 0; j < symbolIndices.size(); ++j) {
        uint32_t symIndex = symbolIndices[j];
        const NList &sym = nList[symIndex];
@ -760,6 +792,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
      }
      continue;
    }
+    sections[i]->doneSplitting = true;

    // Calculate symbol sizes and create subsections by splitting the sections
    // along symbol boundaries.
@ -930,6 +963,8 @@ template <class LP> void ObjFile::parse() {
  }
  if (compactUnwindSection)
    registerCompactUnwind(*compactUnwindSection);
+  if (config->parseEhFrames && ehFrameSection)
+    registerEhFrames(*ehFrameSection);
 }

 template <class LP> void ObjFile::parseLazy() {
@ -1003,6 +1038,12 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
    // of the corresponding relocations.) We rely on `relocateCompactUnwind()`
    // to correctly handle these truncated input sections.
    isec->data = isec->data.slice(target->wordSize);
+    uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t));
+    // llvm-mc omits CU entries for functions that need DWARF encoding, but
+    // `ld -r` doesn't. We can ignore them because we will re-synthesize these
+    // CU entries from the DWARF info during the output phase.
+    if ((encoding & target->modeDwarfEncoding) == target->modeDwarfEncoding)
+      continue;

    ConcatInputSection *referentIsec;
    for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
@ -1053,6 +1094,252 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
  }
 }

+struct CIE {
+  macho::Symbol *personalitySymbol = nullptr;
+  bool fdesHaveLsda = false;
+  bool fdesHaveAug = false;
+};
+
+static CIE parseCIE(const InputSection *isec, const EhReader &reader,
+                    size_t off) {
+  // Handling the full generality of possible DWARF encodings would be a major
+  // pain. We instead take advantage of our knowledge of how llvm-mc encodes
+  // DWARF and handle just that.
+  constexpr uint8_t expectedPersonalityEnc =
+      dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
+  constexpr uint8_t expectedPointerEnc =
+      dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
+
+  CIE cie;
+  uint8_t version = reader.readByte(&off);
+  if (version != 1 && version != 3)
+    fatal("Expected CIE version of 1 or 3, got " + Twine(version));
+  StringRef aug = reader.readString(&off);
+  reader.skipLeb128(&off); // skip code alignment
+  reader.skipLeb128(&off); // skip data alignment
+  reader.skipLeb128(&off); // skip return address register
+  reader.skipLeb128(&off); // skip aug data length
+  uint64_t personalityAddrOff = 0;
+  for (char c : aug) {
+    switch (c) {
+    case 'z':
+      cie.fdesHaveAug = true;
+      break;
+    case 'P': {
+      uint8_t personalityEnc = reader.readByte(&off);
+      if (personalityEnc != expectedPersonalityEnc)
+        reader.failOn(off, "unexpected personality encoding 0x" +
+                               Twine::utohexstr(personalityEnc));
+      personalityAddrOff = off;
+      off += 4;
+      break;
+    }
+    case 'L': {
+      cie.fdesHaveLsda = true;
+      uint8_t lsdaEnc = reader.readByte(&off);
+      if (lsdaEnc != expectedPointerEnc)
+        reader.failOn(off, "unexpected LSDA encoding 0x" +
+                               Twine::utohexstr(lsdaEnc));
+      break;
+    }
+    case 'R': {
+      uint8_t pointerEnc = reader.readByte(&off);
+      if (pointerEnc != expectedPointerEnc)
+        reader.failOn(off, "unexpected pointer encoding 0x" +
+                               Twine::utohexstr(pointerEnc));
+      break;
+    }
+    default:
+      break;
+    }
+  }
+  if (personalityAddrOff != 0) {
+    auto personalityRelocIt =
+        llvm::find_if(isec->relocs, [=](const macho::Reloc &r) {
+          return r.offset == personalityAddrOff;
+        });
+    if (personalityRelocIt == isec->relocs.end())
+      reader.failOn(off, "Failed to locate relocation for personality symbol");
+    cie.personalitySymbol = personalityRelocIt->referent.get<macho::Symbol *>();
+  }
+  return cie;
+}
+
+// EH frame target addresses may be encoded as pcrel offsets. However, instead
+// of using an actual pcrel reloc, ld64 emits subtractor relocations instead.
+// This function recovers the target address from the subtractors, essentially
+// performing the inverse operation of EhRelocator.
+//
+// Concretely, we expect our relocations to write the value of `PC -
+// target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that
+// points to a symbol or section plus an addend.
+//
+// If `Invert` is set, then we instead expect `target_addr - PC` to be written
+// to `PC`.
+template <bool Invert = false>
+Defined *
+getTargetSymbolFromSubtraction(const InputSection *isec,
+                               std::vector<macho::Reloc>::iterator relocIt) {
+  const macho::Reloc &subtrahend = *relocIt;
+  const macho::Reloc &minuend = *std::next(relocIt);
+  assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND));
+  assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED));
+  // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero
+  // addend.
+  auto *pcSym = cast<Defined>(subtrahend.referent.get<macho::Symbol *>());
+  Defined *target =
+      cast_or_null<Defined>(minuend.referent.dyn_cast<macho::Symbol *>());
+  if (!pcSym) {
+    auto *targetIsec =
+        cast<ConcatInputSection>(minuend.referent.get<InputSection *>());
+    target = findSymbolAtOffset(targetIsec, minuend.addend);
+  }
+  if (Invert)
+    std::swap(pcSym, target);
+  if (pcSym->isec != isec ||
+      pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
+    fatal("invalid FDE relocation in __eh_frame");
+  return target;
+}
+
+Defined *findSymbolAtAddress(const std::vector<Section *> &sections,
+                             uint64_t addr) {
+  Section *sec = findContainingSection(sections, &addr);
+  auto *isec = cast<ConcatInputSection>(findContainingSubsection(*sec, &addr));
+  return findSymbolAtOffset(isec, addr);
+}
+
+// For symbols that don't have compact unwind info, associate them with the more
+// general-purpose (and verbose) DWARF unwind info found in __eh_frame.
+//
+// This requires us to parse the contents of __eh_frame. See EhFrame.h for a
+// description of its format.
+//
+// While parsing, we also look for what MC calls "abs-ified" relocations -- they
+// are relocations which are implicitly encoded as offsets in the section data.
+// We convert them into explicit Reloc structs so that the EH frames can be
+// handled just like a regular ConcatInputSection later in our output phase.
+//
+// We also need to handle the case where our input object file has explicit
+// relocations. This is the case when e.g. it's the output of `ld -r`. We only
+// look for the "abs-ified" relocation if an explicit relocation is absent.
+void ObjFile::registerEhFrames(Section &ehFrameSection) {
+  DenseMap<const InputSection *, CIE> cieMap;
+  for (const Subsection &subsec : ehFrameSection.subsections) {
+    auto *isec = cast<ConcatInputSection>(subsec.isec);
+    uint64_t isecOff = subsec.offset;
+
+    // Subtractor relocs require the subtrahend to be a symbol reloc. Ensure
+    // that all EH frames have an associated symbol so that we can generate
+    // subtractor relocs that reference them.
+    if (isec->symbols.size() == 0)
+      isec->symbols.push_back(make<Defined>(
+          "EH_Frame", isec->getFile(), isec, /*value=*/0, /*size=*/0,
+          /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
+          /*includeInSymtab=*/false, /*isThumb=*/false,
+          /*isReferencedDynamically=*/false, /*noDeadStrip=*/false));
+    else if (isec->symbols[0]->value != 0)
+      fatal("found symbol at unexpected offset in __eh_frame");
+
+    EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+    size_t dataOff = 0; // Offset from the start of the EH frame.
+    reader.skipValidLength(&dataOff); // readLength() already validated this.
+    // cieOffOff is the offset from the start of the EH frame to the cieOff
+    // value, which is itself an offset from the current PC to a CIE.
+    const size_t cieOffOff = dataOff;
+
+    EhRelocator ehRelocator(isec);
+    auto cieOffRelocIt = llvm::find_if(
+        isec->relocs, [=](const Reloc &r) { return r.offset == cieOffOff; });
+    InputSection *cieIsec = nullptr;
+    if (cieOffRelocIt != isec->relocs.end()) {
+      // We already have an explicit relocation for the CIE offset.
+      cieIsec =
+          getTargetSymbolFromSubtraction</*Invert=*/true>(isec, cieOffRelocIt)
+              ->isec;
+      dataOff += sizeof(uint32_t);
+    } else {
+      // If we haven't found a relocation, then the CIE offset is most likely
+      // embedded in the section data (AKA an "abs-ified" reloc.). Parse that
+      // and generate a Reloc struct.
+      uint32_t cieMinuend = reader.readU32(&dataOff);
+      if (cieMinuend == 0)
+        cieIsec = isec;
+      else {
+        uint32_t cieOff = isecOff + dataOff - cieMinuend;
+        cieIsec = findContainingSubsection(ehFrameSection, &cieOff);
+        if (cieIsec == nullptr)
+          fatal("failed to find CIE");
+      }
+      if (cieIsec != isec)
+        ehRelocator.makeNegativePcRel(cieOffOff, cieIsec->symbols[0],
+                                      /*length=*/2);
+    }
+    if (cieIsec == isec) {
+      cieMap[cieIsec] = parseCIE(isec, reader, dataOff);
+      continue;
+    }
+
+    // Offset of the function address within the EH frame.
+    const size_t funcAddrOff = dataOff;
+    uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
+                        isecOff + funcAddrOff;
+    uint32_t funcLength = reader.readPointer(&dataOff);
+    size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
+    assert(cieMap.count(cieIsec));
+    const CIE &cie = cieMap[cieIsec];
+    Optional<uint64_t> lsdaAddrOpt;
+    if (cie.fdesHaveAug) {
+      reader.skipLeb128(&dataOff);
+      lsdaAddrOff = dataOff;
+      if (cie.fdesHaveLsda) {
+        uint64_t lsdaOff = reader.readPointer(&dataOff);
+        if (lsdaOff != 0) // FIXME possible to test this?
+          lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
+      }
+    }
+
+    auto funcAddrRelocIt = isec->relocs.end();
+    auto lsdaAddrRelocIt = isec->relocs.end();
+    for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
+      if (it->offset == funcAddrOff)
+        funcAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+      else if (lsdaAddrOpt && it->offset == lsdaAddrOff)
+        lsdaAddrRelocIt = it++; // Found subtrahend; skip over minuend reloc
+    }
+
+    Defined *funcSym;
+    if (funcAddrRelocIt != isec->relocs.end()) {
+      funcSym = getTargetSymbolFromSubtraction(isec, funcAddrRelocIt);
+    } else {
+      funcSym = findSymbolAtAddress(sections, funcAddr);
+      ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
+    }
+    // The symbol has been coalesced, or already has a compact unwind entry.
+    if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+      // We must prune unused FDEs for correctness, so we cannot rely on
+      // -dead_strip being enabled.
+      isec->live = false;
+      continue;
+    }
+
+    InputSection *lsdaIsec = nullptr;
+    if (lsdaAddrRelocIt != isec->relocs.end()) {
+      lsdaIsec = getTargetSymbolFromSubtraction(isec, lsdaAddrRelocIt)->isec;
+    } else if (lsdaAddrOpt) {
+      uint64_t lsdaAddr = *lsdaAddrOpt;
+      Section *sec = findContainingSection(sections, &lsdaAddr);
+      lsdaIsec =
+          cast<ConcatInputSection>(findContainingSubsection(*sec, &lsdaAddr));
+      ehRelocator.makePcRel(lsdaAddrOff, lsdaIsec, target->p2WordSize);
+    }
+
+    fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
+    funcSym->unwindEntry = isec;
+    ehRelocator.commit();
+  }
+}
+
 // The path can point to either a dylib or a .tbd file.
 static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
  Optional<MemoryBufferRef> mbref = readFile(path);
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@ -60,7 +60,8 @@ struct Subsection {
 using Subsections = std::vector<Subsection>;
 class InputFile;

-struct Section {
+class Section {
+public:
  InputFile *file;
  StringRef segname;
  StringRef name;
@ -76,6 +77,13 @@ struct Section {
  Section &operator=(const Section &) = delete;
  Section(Section &&) = delete;
  Section &operator=(Section &&) = delete;
+
+private:
+  // Whether we have already split this section into individual subsections.
+  // For sections that cannot be split (e.g. literal sections), this is always
+  // false.
+  bool doneSplitting = false;
+  friend class ObjFile;
 };

 // Represents a call graph profile edge.
@ -135,6 +143,12 @@ private:
  static int idCount;
 };

+struct FDE {
+  uint32_t funcLength;
+  Symbol *personality;
+  InputSection *lsda;
+};
+
 // .o file
 class ObjFile final : public InputFile {
 public:
@ -146,10 +160,11 @@ public:
  static bool classof(const InputFile *f) { return f->kind() == ObjKind; }

  llvm::DWARFUnit *compileUnit = nullptr;
+  Section *addrSigSection = nullptr;
  const uint32_t modTime;
  std::vector<ConcatInputSection *> debugSections;
  std::vector<CallGraphEntry> callGraph;
-  Section *addrSigSection = nullptr;
+  llvm::DenseMap<ConcatInputSection *, FDE> fdes;

 private:
  template <class LP> void parseLazy();
@ -164,7 +179,9 @@ private:
  void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
                        const SectionHeader &, Section &);
  void parseDebugInfo();
+  void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
  void registerCompactUnwind(Section &compactUnwindSection);
+  void registerEhFrames(Section &ehFrameSection);
 };

 // command-line -sectcreate file
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@ -268,6 +268,11 @@ bool macho::isClassRefsSection(const InputSection *isec) {
         isec->getSegName() == segment_names::data;
 }

+bool macho::isEhFrameSection(const InputSection *isec) {
+  return isec->getName() == section_names::ehFrame &&
+         isec->getSegName() == segment_names::text;
+}
+
 std::string lld::toString(const InputSection *isec) {
  return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();
 }
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@ -273,6 +273,7 @@ inline bool isWordLiteralSection(uint32_t flags) {
 bool isCodeSection(const InputSection *);
 bool isCfStringSection(const InputSection *);
 bool isClassRefsSection(const InputSection *);
+bool isEhFrameSection(const InputSection *);

 extern std::vector<ConcatInputSection *> inputSections;

--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@ -61,6 +61,13 @@ struct Reloc {
  // gives the destination that this relocation refers to.
  int64_t addend = 0;
  llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
+
+  Reloc() = default;
+
+  Reloc(uint8_t type, bool pcrel, uint8_t length, uint32_t offset,
+        int64_t addend, llvm::PointerUnion<Symbol *, InputSection *> referent)
+      : type(type), pcrel(pcrel), length(length), offset(offset),
+        addend(addend), referent(referent) {}
 };

 bool validateSymbolRelocation(const Symbol *, const InputSection *,
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@ -183,6 +183,7 @@ public:
  uint64_t value;
  // size is only calculated for regular (non-bitcode) symbols.
  uint64_t size;
+  // This can be a subsection of either __compact_unwind or __eh_frame.
  ConcatInputSection *unwindEntry = nullptr;
 };

--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@ -14,6 +14,7 @@

 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"

 #include <cstddef>
@ -37,6 +38,7 @@ public:
    pageZeroSize = LP::pageZeroSize;
    headerSize = sizeof(typename LP::mach_header);
    wordSize = LP::wordSize;
+    p2WordSize = llvm::CTLog2<LP::wordSize>();
  }

  virtual ~TargetInfo() = default;
@ -85,12 +87,17 @@ public:
  size_t stubSize;
  size_t stubHelperHeaderSize;
  size_t stubHelperEntrySize;
+  uint8_t p2WordSize;
  size_t wordSize;

  size_t thunkSize = 0;
  uint64_t forwardBranchRange = 0;
  uint64_t backwardBranchRange = 0;

+  uint32_t modeDwarfEncoding;
+  uint8_t subtractorRelocType;
+  uint8_t unsignedRelocType;
+
  // We contrive this value as sufficiently far from any valid address that it
  // will always be out-of-range for any architecture. UINT64_MAX is not a
  // good choice because it is (a) only 1 away from wrapping to 0, and (b) the
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@ -28,6 +28,7 @@

 using namespace llvm;
 using namespace llvm::MachO;
+using namespace llvm::support::endian;
 using namespace lld;
 using namespace lld::macho;

@ -222,7 +223,8 @@ void UnwindInfoSectionImpl::prepareRelocations() {
  // entries to the GOT. Hence the use of a MapVector for
  // UnwindInfoSection::symbols.
  for (const Defined *d : make_second_range(symbols))
-    if (d->unwindEntry)
+    if (d->unwindEntry &&
+        d->unwindEntry->getName() == section_names::compactUnwind)
      prepareRelocations(d->unwindEntry);
 }

@ -331,6 +333,18 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
    if (!d->unwindEntry)
      return;

+    // If we have DWARF unwind info, create a CU entry that points to it.
+    if (d->unwindEntry->getName() == section_names::ehFrame) {
+      cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff;
+      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+      cu.functionLength = fde.funcLength;
+      cu.personality = fde.personality;
+      cu.lsda = fde.lsda;
+      return;
+    }
+
+    assert(d->unwindEntry->getName() == section_names::compactUnwind);
+
    auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
               target->wordSize;
    cu.functionLength =
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@ -950,8 +950,14 @@ template <class LP> void Writer::createOutputSections() {
    StringRef segname = it.first.first;
    ConcatOutputSection *osec = it.second;
    assert(segname != segment_names::ld);
-    if (osec->isNeeded())
+    if (osec->isNeeded()) {
+      // See comment in ObjFile::splitEhFrames()
+      if (osec->name == section_names::ehFrame &&
+          segname == segment_names::text)
+        osec->align = target->wordSize;
+
      getOrCreateOutputSegment(segname)->addOutputSection(osec);
+    }
  }

  for (SyntheticSection *ssec : syntheticSections) {
--- a/lld/test/MachO/Inputs/eh-frame-x86_64-r.o
+++ b/lld/test/MachO/Inputs/eh-frame-x86_64-r.o
--- a/lld/test/MachO/eh-frame.s
+++ b/lld/test/MachO/eh-frame.s
@ -0,0 +1,161 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %s -o %t/eh-frame-x86_64.o
+# RUN: %lld -lSystem -lc++ %t/eh-frame-x86_64.o -o %t/eh-frame-x86_64
+# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
+# RUN:   --dwarf=frames %t/eh-frame-x86_64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
+# RUN: llvm-nm -m %t/eh-frame-x86_64 | FileCheck %s --check-prefix NO-EH-SYMS
+# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64 | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
+
+## Test that we correctly handle the output of `ld -r`, which emits EH frames
+## using subtractor relocations instead of implicitly encoding the offsets.
+## In order to keep this test cross-platform, we check in ld64's output rather
+## than invoking ld64 directly. NOTE: whenever this test is updated, the
+## checked-in copy of `ld -r`'s output should be updated too!
+# COM: ld -r %t/eh-frame-x86_64.o -o %S/Inputs/eh-frame-x86_64-r.o
+# RUN: %lld -lSystem -lc++ %S/Inputs/eh-frame-x86_64-r.o -o %t/eh-frame-x86_64-r
+# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \
+# RUN:   --dwarf=frames %t/eh-frame-x86_64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4
+# RUN: llvm-nm -m %t/eh-frame-x86_64-r | FileCheck %s --check-prefix NO-EH-SYMS
+# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64-r | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3
+
+# ALIGN:      Name: __eh_frame
+# ALIGN-NEXT: Segment: __TEXT
+# ALIGN-NEXT: Address:
+# ALIGN-NEXT: Size:
+# ALIGN-NEXT: Offset:
+# ALIGN-NEXT: Alignment: [[#ALIGN]]
+
+# NO-EH-SYMS-NOT: __eh_frame
+
+# CHECK: Indirect symbols for (__DATA_CONST,__got) 2 entries
+# CHECK: address                         index  name
+# CHECK: 0x[[#%x,GXX_PERSONALITY_GOT:]]  {{.*}}  ___gxx_personality_v0
+# CHECK: 0x[[#%x,MY_PERSONALITY_GOT:]]
+# CHECK: SYMBOL TABLE:
+# CHECK-DAG: [[#%x,F:]]              l   F __TEXT,__text _f
+# CHECK-DAG: [[#%x,NO_UNWIND:]]      l   F __TEXT,__text _no_unwind
+# CHECK-DAG: [[#%x,G:]]              l   F __TEXT,__text _g
+# CHECK-DAG: [[#%x,H:]]              l   F __TEXT,__text _h
+# CHECK-DAG: [[#%x,EXCEPT0:]]        l   O __TEXT,__gcc_except_tab GCC_except_table0
+# CHECK-DAG: [[#%x,EXCEPT1:]]        l   O __TEXT,__gcc_except_tab GCC_except_table1
+# CHECK-DAG: [[#%x,EXCEPT2:]]        l   O __TEXT,custom_except custom_except_table2
+# CHECK-DAG: [[#%x,MY_PERSONALITY:]] g   F __TEXT,__text _my_personality
+# CHECK: Contents of __unwind_info section:
+# CHECK:   Version:                                   0x1
+# CHECK:   Number of personality functions in array:  0x2
+# CHECK:   Number of indices in array:                0x2
+# CHECK:   Personality functions: (count = 2)
+# CHECK:     personality[1]: 0x[[#%.8x,GXX_PERSONALITY_GOT - BASE]]
+# CHECK:     personality[2]: 0x[[#%.8x,MY_PERSONALITY_GOT - BASE]]
+# CHECK:   LSDA descriptors:
+# CHECK:     [0]: function offset=0x[[#%.8x,F - BASE]], LSDA offset=0x[[#%.8x,EXCEPT0 - BASE]]
+# CHECK:     [1]: function offset=0x[[#%.8x,G - BASE]], LSDA offset=0x[[#%.8x,EXCEPT1 - BASE]]
+# CHECK:     [2]: function offset=0x[[#%.8x,H - BASE]], LSDA offset=0x[[#%.8x,EXCEPT2 - BASE]]
+# CHECK:   Second level indices:
+# CHECK:     Second level index[0]:
+# CHECK:       [0]: function offset=0x[[#%.8x,F - BASE]],              encoding[{{.*}}]=0x52{{.*}}
+# CHECK:       [1]: function offset=0x[[#%.8x,NO_UNWIND - BASE]],      encoding[{{.*}}]=0x00000000
+# CHECK:       [2]: function offset=0x[[#%.8x,G - BASE]],              encoding[{{.*}}]=0x1[[#%x,DWARF_ENC]][[#%.6x, G_DWARF_OFF:]]
+# CHECK:       [3]: function offset=0x[[#%.8x,H - BASE]],              encoding[{{.*}}]=0x2[[#%x,DWARF_ENC]][[#%.6x, H_DWARF_OFF:]]
+# CHECK:       [4]: function offset=0x[[#%.8x,MY_PERSONALITY - BASE]], encoding[{{.*}}]=0x00000000
+
+# CHECK: .debug_frame contents:
+# CHECK: .eh_frame contents:
+
+# CHECK: [[#%.8x,CIE1_OFF:]] {{.*}} CIE
+# CHECK:   Format:                DWARF32
+# CHECK:   Version:               1
+# CHECK:   Augmentation:          "zPLR"
+# CHECK:   Code alignment factor: 1
+# CHECK:   Data alignment factor: -8
+# CHECK:   Return address column:
+# CHECK:   Personality Address:   [[#%.16x,GXX_PERSONALITY_GOT]]
+# CHECK:   Augmentation data:     9B {{(([[:xdigit:]]{2} ){4})}}10 10
+
+# CHECK: [[#%.8x,G_DWARF_OFF]] {{.*}} [[#%.8x,G_DWARF_OFF + 4 - CIE1_OFF]] FDE cie=[[#CIE1_OFF]] pc=[[#%x,G]]
+# CHECK:   Format:       DWARF32
+# CHECK:   LSDA Address: [[#%.16x,EXCEPT1]]
+# CHECK:   DW_CFA_def_cfa_offset: +8
+# CHECK:   0x[[#%x,G]]:
+
+# CHECK: [[#%.8x,CIE2_OFF:]] {{.*}} CIE
+# CHECK:   Format:                DWARF32
+# CHECK:   Version:               1
+# CHECK:   Augmentation:          "zPLR"
+# CHECK:   Code alignment factor: 1
+# CHECK:   Data alignment factor: -8
+# CHECK:   Return address column:
+# CHECK:   Personality Address:   [[#%.16x,MY_PERSONALITY_GOT]]
+# CHECK:   Augmentation data:     9B {{(([[:xdigit:]]{2} ){4})}}10 10
+
+# CHECK: [[#%.8x,H_DWARF_OFF]] {{.*}} [[#%.8x,H_DWARF_OFF + 4 - CIE2_OFF]] FDE cie=[[#CIE2_OFF]] pc=[[#%x,H]]
+# CHECK:   Format:       DWARF32
+# CHECK:   LSDA Address: [[#%.16x,EXCEPT2]]
+# CHECK:   DW_CFA_def_cfa_offset: +8
+# CHECK:   0x[[#%x,H]]:
+
+.globl _my_personality, _main
+
+.text
+## _f's unwind info can be encoded with compact unwind, so we shouldn't see an
+## FDE entry for it in the output file.
+.p2align 2
+_f:
+  .cfi_startproc
+  .cfi_personality 155, ___gxx_personality_v0
+  .cfi_lsda 16, Lexception0
+  .cfi_def_cfa_offset 8
+  ret
+  .cfi_endproc
+
+.p2align 2
+_no_unwind:
+  ret
+
+.p2align 2
+_g:
+  .cfi_startproc
+  .cfi_personality 155, ___gxx_personality_v0
+  .cfi_lsda 16, Lexception1
+  .cfi_def_cfa_offset 8
+  ## cfi_escape cannot be encoded in compact unwind, so we must keep _g's FDE
+  .cfi_escape 0x2e, 0x10
+  ret
+  .cfi_endproc
+
+.p2align 2
+_h:
+  .cfi_startproc
+  .cfi_personality 155, _my_personality
+  .cfi_lsda 16, Lexception2
+  .cfi_def_cfa_offset 8
+  ## cfi_escape cannot be encoded in compact unwind, so we must keep _h's FDE
+  .cfi_escape 0x2e, 0x10
+  ret
+  .cfi_endproc
+
+.p2align 2
+_my_personality:
+  ret
+
+.p2align 2
+_main:
+  ret
+
+.section __TEXT,__gcc_except_tab
+GCC_except_table0:
+Lexception0:
+  .byte 255
+
+GCC_except_table1:
+Lexception1:
+  .byte 255
+
+.section __TEXT,custom_except
+custom_except_table2:
+Lexception2:
+  .byte 255
+
+.subsections_via_symbols
--- a/lld/test/MachO/invalid/eh-frame.s
+++ b/lld/test/MachO/invalid/eh-frame.s
@ -0,0 +1,83 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-1.s -o %t/too-small-1.o
+# RUN: not %lld -lSystem -dylib %t/too-small-1.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-1
+# TOO-SMALL-1: error: {{.*}}too-small-1.o:(__eh_frame+0x0): CIE/FDE too small
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/too-small-2.s -o %t/too-small-2.o
+# RUN: not %lld -lSystem -dylib %t/too-small-2.o -o /dev/null 2>&1 | FileCheck %s --check-prefix TOO-SMALL-2
+# TOO-SMALL-2: error: {{.*}}too-small-2.o:(__eh_frame+0x0): CIE/FDE extends past the end of the section
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/personality-enc.s -o %t/personality-enc.o
+# RUN: not %lld -lSystem -dylib %t/personality-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PERS-ENC
+# PERS-ENC: error: {{.*}}personality-enc.o:(__eh_frame+0x12): unexpected personality encoding 0xb
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/pointer-enc.s -o %t/pointer-enc.o
+# RUN: not %lld -lSystem -dylib %t/pointer-enc.o -o /dev/null 2>&1 | FileCheck %s --check-prefix PTR-ENC
+# PTR-ENC: error: {{.*}}pointer-enc.o:(__eh_frame+0x11): unexpected pointer encoding 0x12
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/string-err.s -o %t/string-err.o
+# RUN: not %lld -lSystem -dylib %t/string-err.o -o /dev/null 2>&1 | FileCheck %s --check-prefix STR
+# STR: error: {{.*}}string-err.o:(__eh_frame+0x9): corrupted CIE (failed to read string)
+
+#--- too-small-1.s
+.p2align 3
+.section __TEXT,__eh_frame
+.short 0x3
+
+.subsections_via_symbols
+
+#--- too-small-2.s
+.p2align 3
+.section __TEXT,__eh_frame
+.long 0x3  # length
+
+.subsections_via_symbols
+
+#--- personality-enc.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x14   # length
+.long 0      # CIE offset
+.byte 1      # version
+.asciz "zPR" # aug string
+.byte 0x01   # code alignment
+.byte 0x78   # data alignment
+.byte 0x10   # return address register
+.byte 0x01   # aug length
+.byte 0x0b   # personality encoding
+.long 0xffff # personality pointer
+.byte 0x10   # pointer encoding
+.space 1     # pad to alignment
+
+.subsections_via_symbols
+
+#--- pointer-enc.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x14  # length
+.long 0     # CIE offset
+.byte 1     # version
+.asciz "zR" # aug string
+.byte 0x01  # code alignment
+.byte 0x78  # data alignment
+.byte 0x10  # return address register
+.byte 0x01  # aug length
+.byte 0x12  # pointer encoding
+.space 7    # pad to alignment
+
+.subsections_via_symbols
+
+#--- string-err.s
+.p2align 3
+.section __TEXT,__eh_frame
+
+.long 0x7   # length
+.long 0     # CIE offset
+.byte 1     # version
+.ascii "zR" # invalid aug string
+
+.subsections_via_symbols
--- a/lld/test/MachO/obj-file-with-stabs.s
+++ b/lld/test/MachO/obj-file-with-stabs.s
@ -1,27 +1,33 @@
 # REQUIRES: x86
-## FIXME: This yaml is from an object file produced with 'ld -r'
-##        Replace this with "normal" .s test format once lld supports `-r`
+## FIXME: This yaml is from an object file produced with 'ld -r':
+##
+##   echo "int main() {return 1;}" > test.c
+##   clang -c -g -o test.o test.c
+##   ld -r -o test2.o test.o -no_data_in_code_info
+##
+## Replace this with "normal" .s test format once lld supports `-r`

 # RUN: yaml2obj %s -o %t.o
-# RUN: %lld -lSystem -platform_version macos 11.3 11.0 -arch x86_64 %t.o -o %t
+# RUN: %lld -lSystem -arch x86_64 %t.o -o %t
+
 --- !mach-o
 FileHeader:
  magic:           0xFEEDFACF
-  cputype:         0x01000007
-  cpusubtype:      0x00000003
-  filetype:        0x00000001
-  ncmds:           2
-  sizeofcmds:      384
-  flags:           0x00002000
-  reserved:        0x00000000
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           3
+  sizeofcmds:      288
+  flags:           0x2000
+  reserved:        0x0
 LoadCommands:
  - cmd:             LC_SEGMENT_64
-    cmdsize:         312
+    cmdsize:         232
    segname:         ''
    vmaddr:          0
-    vmsize:          120
-    fileoff:         448
-    filesize:        120
+    vmsize:          56
+    fileoff:         352
+    filesize:        56
    maxprot:         7
    initprot:        7
    nsects:          2
@ -29,57 +35,33 @@ LoadCommands:
    Sections:
      - sectname:        __text
        segname:         __TEXT
-        addr:            0x0000000000000000
+        addr:            0x0
        size:            18
-        offset:          0x000001C0
+        offset:          0x160
        align:           4
-        reloff:          0x00000000
+        reloff:          0x0
        nreloc:          0
        flags:           0x80000400
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
        content:         554889E5C745FC00000000B8010000005DC3
-      - sectname:        __eh_frame
-        segname:         __TEXT
-        addr:            0x0000000000000018
-        size:            64
-        offset:          0x000001D8
+      - sectname:        __compact_unwind
+        segname:         __LD
+        addr:            0x18
+        size:            32
+        offset:          0x178
        align:           3
-        reloff:          0x00000238
-        nreloc:          4
-        flags:           0x00000000
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-        content:         1400000000000000017A520001781001100C0708900100002400000004000000F8FFFFFFFFFFFFFF120000000000000000410E108602430D0600000000000000
+        reloff:          0x198
+        nreloc:          1
+        flags:           0x2000000
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         '0000000000000000120000000000000100000000000000000000000000000000'
        relocations:
-          - address:         0x0000001C
-            symbolnum:       0
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-          - address:         0x0000001C
-            symbolnum:       1
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-          - address:         0x00000020
-            symbolnum:       1
-            pcrel:           false
-            length:          3
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-          - address:         0x00000020
-            symbolnum:       10
+          - address:         0x0
+            symbolnum:       8
            pcrel:           false
            length:          3
            extern:          true
@ -88,39 +70,72 @@ LoadCommands:
            value:           0
  - cmd:             LC_SYMTAB
    cmdsize:         24
-    symoff:          608
-    nsyms:           11
-    stroff:          784
-    strsize:         72
+    symoff:          416
+    nsyms:           9
+    stroff:          560
+    strsize:         48
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        1
+    minos:           659200
+    sdk:             0
+    ntools:          1
+    Tools:
+      - tool:            3
+        version:         46596096
 LinkEditData:
  NameList:
-    - n_strx:          8      ## N_STAB sym (in got)
-      n_type:          0x0E
-      n_sect:          2
+    - n_strx:          8
+      n_type:          0x64 ## N_SO STAB
+      n_sect:          0
      n_desc:          0
-      n_value:         24
-    - n_strx:          18
-      n_type:          0x0E
-      n_sect:          2
+      n_value:         0
+    - n_strx:          14
+      n_type:          0x64 ## N_SO STAB
+      n_sect:          0
      n_desc:          0
-      n_value:         48
+      n_value:         0
+    - n_strx:          21
+      n_type:          0x66 ## N_OSO STAB
+      n_sect:          3
+      n_desc:          1
+      n_value:         1651001352
    - n_strx:          1
-      n_type:          0x4E
+      n_type:          0x2E ## N_BNSYM STAB
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          41
+      n_type:          0x24 ## N_FUN STAB
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          1
+      n_type:          0x24 ## N_FUN STAB
+      n_sect:          0
+      n_desc:          0
+      n_value:         18
+    - n_strx:          1
+      n_type:          0x4E ## N_ENSYM STAB
      n_sect:          1
      n_desc:          0
      n_value:         18
-    - n_strx:          2          ## _main
-      n_type:          0x0F
+    - n_strx:          1
+      n_type:          0x64 ## N_SO STAB
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          2
+      n_type:          0xF
      n_sect:          1
      n_desc:          0
      n_value:         0
  StringTable:
    - ' '
    - _main
-    - EH_Frame1
-    - func.eh
-    - '/Users/vyng/'
-    - test.cc
-    - '/Users/vyng/test.o'
+    - '/tmp/'
+    - test.c
+    - '/private/tmp/test.o'
    - _main
+    - ''
 ...
--- a/lld/test/MachO/tools/generate-cfi-funcs.py
+++ b/lld/test/MachO/tools/generate-cfi-funcs.py
@ -24,9 +24,6 @@ def print_function(name):
  have_lsda = (random.random() < lsda_odds)
  frame_size = random.randint(4, 64) * 16
  frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
-  reg_count = random.randint(0, 5)
-  reg_combo = random.randint(0, factorial(reg_count) - 1)
-  regs_saved = saved_regs_combined[reg_count][reg_combo]
  global func_size_low, func_size_high
  func_size = random.randint(func_size_low, func_size_high) * 0x10
  func_size_high += 1
@ -34,13 +31,13 @@ def print_function(name):
    func_size_low += 1

  print("""\
-### %s regs=%d frame=%d lsda=%s size=%d
+### %s frame=%d lsda=%s size=%d
    .section __TEXT,__text,regular,pure_instructions
    .p2align 4, 0x90
    .globl %s
 %s:
    .cfi_startproc""" % (
-        name, reg_count, frame_size, have_lsda, func_size, name, name))
+        name, frame_size, have_lsda, func_size, name, name))
  if have_lsda:
    global lsda_n
    lsda_n += 1
@ -53,8 +50,6 @@ def print_function(name):
    .cfi_offset %%rbp, %d
    movq %%rsp, %%rbp
    .cfi_def_cfa_register %%rbp""" % (frame_size, frame_offset + 6*8))
-  for i in range(reg_count):
-    print(".cfi_offset %s, %d" % (regs_saved[i], frame_offset+(i*8)))
  print("""\
    .fill %d
    popq %%rbp