[lld-macho] Map file should map symbols to their original bitcode file

... instead of mapping them to the intermediate object file.
This matches ld64.

Reviewed By: #lld-macho, Roger

Differential Revision: https://reviews.llvm.org/D136380
This commit is contained in:
Jez Ng 2022-10-21 22:48:25 -04:00
parent da374d180d
commit b945733026
7 changed files with 142 additions and 29 deletions

View File

@ -108,7 +108,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// load the ObjFile emitted by LTO compilation.
if (r.Prevailing)
replaceSymbol<Undefined>(sym, sym->getName(), sym->getFile(),
RefState::Strong);
RefState::Strong, /*wasBitcodeSymbol=*/true);
// TODO: set the other resolution configs properly
}

View File

@ -45,32 +45,41 @@ using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;
using Symbols = std::vector<Defined *>;
// Returns a pair where the left element is a container of all live Symbols and
// the right element is a container of all dead symbols.
static std::pair<Symbols, Symbols> getSymbols() {
Symbols liveSymbols, deadSymbols;
struct MapInfo {
SmallVector<InputFile *> files;
SmallVector<Defined *> liveSymbols;
SmallVector<Defined *> deadSymbols;
};
static MapInfo gatherMapInfo() {
MapInfo info;
for (InputFile *file : inputFiles)
if (isa<ObjFile>(file))
for (Symbol *sym : file->symbols)
if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
bool hasEmittedSymbol = false;
for (Symbol *sym : file->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym))
if (d->isec && d->getFile() == file) {
if (d->isLive()) {
assert(!shouldOmitFromOutput(d->isec));
liveSymbols.push_back(d);
info.liveSymbols.push_back(d);
} else {
deadSymbols.push_back(d);
info.deadSymbols.push_back(d);
}
hasEmittedSymbol = true;
}
parallelSort(liveSymbols.begin(), liveSymbols.end(),
}
if (hasEmittedSymbol)
info.files.push_back(file);
}
parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
[](Defined *a, Defined *b) {
return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
: a->getName() < b->getName();
});
parallelSort(
deadSymbols.begin(), deadSymbols.end(),
info.deadSymbols.begin(), info.deadSymbols.end(),
[](Defined *a, Defined *b) { return a->getName() < b->getName(); });
return {std::move(liveSymbols), std::move(deadSymbols)};
return info;
}
// Construct a map from symbols to their stringified representations.
@ -128,16 +137,16 @@ void macho::writeMapFile() {
os << format("# Arch: %s\n",
getArchitectureName(config->arch()).str().c_str());
MapInfo info = gatherMapInfo();
// Dump table of object files.
os << "# Object files:\n";
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
uint32_t fileIndex = 1;
DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
for (InputFile *file : inputFiles) {
if (isa<ObjFile>(file)) {
os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
readerToFileOrdinal[file] = fileIndex++;
}
for (InputFile *file : info.files) {
os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
readerToFileOrdinal[file] = fileIndex++;
}
// Dump table of sections
@ -153,13 +162,11 @@ void macho::writeMapFile() {
}
// Dump table of symbols
auto [liveSymbols, deadSymbols] = getSymbols();
DenseMap<Symbol *, std::string> liveSymbolStrings =
getSymbolStrings(liveSymbols);
getSymbolStrings(info.liveSymbols);
os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
for (Defined *sym : liveSymbols) {
for (Defined *sym : info.liveSymbols) {
assert(sym->isLive());
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
readerToFileOrdinal[sym->getFile()],
@ -168,10 +175,10 @@ void macho::writeMapFile() {
if (config->deadStrip) {
DenseMap<Symbol *, std::string> deadSymbolStrings =
getSymbolStrings(deadSymbols);
getSymbolStrings(info.deadSymbols);
os << "# Dead Stripped Symbols:\n";
os << "# \tSize \tFile Name\n";
for (Defined *sym : deadSymbols) {
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
readerToFileOrdinal[sym->getFile()],

View File

@ -108,6 +108,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
dysym->unreference();
} else if (auto *undef = dyn_cast<Undefined>(s)) {
// Preserve the original bitcode file name (instead of using the object
// file name).
if (undef->wasBitcodeSymbol)
file = undef->getFile();
}
// Defined symbols take priority over other types of symbols, so in case
// of a name conflict, we fall through to the replaceSymbol() call below.
@ -142,7 +147,8 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
if (wasInserted)
replaceSymbol<Undefined>(s, name, file, refState);
replaceSymbol<Undefined>(s, name, file, refState,
/*wasBitcodeSymbol=*/false);
else if (auto *lazy = dyn_cast<LazyArchive>(s))
lazy->fetchArchiveMember();
else if (isa<LazyObject>(s))

View File

@ -105,6 +105,10 @@ uint64_t Defined::getVA() const {
return isec->getVA(value);
}
ObjFile *Defined::getObjectFile() const {
return isec ? dyn_cast_or_null<ObjFile>(isec->getFile()) : nullptr;
}
void Defined::canonicalize() {
if (unwindEntry)
unwindEntry = unwindEntry->canonical();

View File

@ -133,6 +133,10 @@ public:
uint64_t getVA() const override;
// Returns the object file that this symbol was defined in. This value differs
// from `getFile()` if the symbol originated from a bitcode file.
ObjFile *getObjectFile() const;
std::string getSourceLocation();
// Ensure this symbol's pointers to InputSections point to their canonical
@ -199,8 +203,10 @@ enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
class Undefined : public Symbol {
public:
Undefined(StringRefZ name, InputFile *file, RefState refState)
: Symbol(UndefinedKind, name, file), refState(refState) {
Undefined(StringRefZ name, InputFile *file, RefState refState,
bool wasBitcodeSymbol)
: Symbol(UndefinedKind, name, file), refState(refState),
wasBitcodeSymbol(wasBitcodeSymbol) {
assert(refState != RefState::Unreferenced);
}
@ -209,6 +215,7 @@ public:
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
RefState refState : 2;
bool wasBitcodeSymbol;
};
// On Unix, it is traditionally allowed to write variable definitions without

View File

@ -1157,8 +1157,7 @@ void SymtabSection::emitStabs() {
if (defined->wasIdenticalCodeFolded)
continue;
InputSection *isec = defined->isec;
ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile());
ObjFile *file = defined->getObjectFile();
if (!file || !file->compileUnit)
continue;

View File

@ -0,0 +1,90 @@
; REQUIRES: x86
; RUN: rm -rf %t; split-file %s %t
;; Verify that we map symbols to their original bitcode input files, not the
;; intermediate object files. Also verify that we don't emit these intermediate
;; object files if no symbol needs to reference them. (Intermediate object files
;; may still be referenced if they contain compiler-synthesized symbols, such
;; as outlined functions. TODO: Test this case.)
; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o
; RUN: opt -module-summary %t/bar.ll -o %t/bar.thinlto.o
; RUN: %lld -dylib %t/foo.thinlto.o %t/bar.thinlto.o -o %t/foobar.thinlto -map \
; RUN: %t/foobar.thinlto.map
; RUN: FileCheck %s --check-prefix=FOOBAR < %t/foobar.thinlto.map
; RUN: %lld -dylib %t/bar.thinlto.o %t/foo.thinlto.o -o %t/barfoo.thinlto -map \
; RUN: %t/barfoo.thinlto.map
; RUN: FileCheck %s --check-prefix=BARFOO < %t/barfoo.thinlto.map
; RUN: llvm-as %t/foo.ll -o %t/foo.o
; RUN: llvm-as %t/bar.ll -o %t/bar.o
; RUN: %lld -dylib %t/foo.o %t/bar.o -o %t/foobar -map %t/foobar.map
; RUN: FileCheck %s --check-prefix=LTO < %t/foobar.map
; FOOBAR: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar.thinlto
; FOOBAR-NEXT: # Arch: x86_64
; FOOBAR-NEXT: # Object files:
; FOOBAR-NEXT: [ 0] linker synthesized
; FOOBAR-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
; FOOBAR-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
; FOOBAR-NEXT: # Sections:
; FOOBAR: # Symbols:
; FOOBAR-NEXT: # Address Size File Name
; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo
; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar
; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak
; BARFOO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/barfoo.thinlto
; BARFOO-NEXT: # Arch: x86_64
; BARFOO-NEXT: # Object files:
; BARFOO-NEXT: [ 0] linker synthesized
; BARFOO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
; BARFOO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
; BARFOO-NEXT: # Sections:
; BARFOO: # Symbols:
; BARFOO-NEXT: # Address Size File Name
; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _bar
; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _maybe_weak
; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _foo
; LTO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar
; LTO-NEXT: # Arch: x86_64
; LTO-NEXT: # Object files:
; LTO-NEXT: [ 0] linker synthesized
; LTO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.o
; LTO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.o
; LTO-NEXT: # Sections:
; LTO: # Symbols:
; LTO-NEXT: # Address Size File Name
; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo
; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar
; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak
;--- foo.ll
target triple = "x86_64-apple-darwin"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @foo() {
ret void
}
;; This is weak in foo.ll but non-weak in bar.ll, so the definition in bar.ll
;; will always prevail. Check that the map file maps `maybe_weak` to bar.ll
;; regardless of the object file input order.
define weak_odr void @maybe_weak() {
ret void
}
;--- bar.ll
target triple = "x86_64-apple-darwin"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @bar() {
ret void
}
define void @maybe_weak() {
ret void
}