[lld-macho] Show source information for undefined references

The error used to look like this:

  ld64.lld: error: undefined symbol: _foo
  >>> referenced by /path/to/bar.o:(symbol _baz+0x4)

If DWARF line information is available, we now show where in the source
the references are coming from:

  ld64.lld: error: unreferenced symbol: _foo
  >>> referenced by: bar.cpp:42 (/path/to/bar.cpp:42)
  >>>                /path/to/bar.o:(symbol _baz+0x4)

Differential Revision: https://reviews.llvm.org/D128184
This commit is contained in:
Daniel Bertalan 2022-06-20 18:49:42 -04:00 committed by Nico Weber
parent 5ba0a9571b
commit cd7624f153
11 changed files with 334 additions and 25 deletions

View File

@ -224,11 +224,11 @@ template <class ELFT>
static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
InputSectionBase &sec, uint64_t offset) {
// In DWARF, functions and variables are stored to different places.
// First, lookup a function for a given offset.
// First, look up a function for a given offset.
if (Optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
return createFileLineMsg(info->FileName, info->Line);
// If it failed, lookup again as a variable.
// If it failed, look up again as a variable.
if (Optional<std::pair<std::string, unsigned>> fileLine =
file.getVariableLoc(sym.getName()))
return createFileLineMsg(fileLine->first, fileLine->second);

View File

@ -20,15 +20,16 @@ using namespace llvm;
std::unique_ptr<DwarfObject> DwarfObject::create(ObjFile *obj) {
auto dObj = std::make_unique<DwarfObject>();
bool hasDwarfInfo = false;
// LLD only needs to extract the source file path from the debug info, so we
// initialize DwarfObject with just the sections necessary to get that path.
// The debugger will locate the debug info via the object file paths that we
// emit in our STABS symbols, so we don't need to process & emit them
// ourselves.
// LLD only needs to extract the source file path and line numbers from the
// debug info, so we initialize DwarfObject with just the sections necessary
// to get that path. The debugger will locate the debug info via the object
// file paths that we emit in our STABS symbols, so we don't need to process &
// emit them ourselves.
for (const InputSection *isec : obj->debugSections) {
if (StringRef *s =
StringSwitch<StringRef *>(isec->getName())
.Case(section_names::debugInfo, &dObj->infoSection.Data)
.Case(section_names::debugLine, &dObj->lineSection.Data)
.Case(section_names::debugAbbrev, &dObj->abbrevSection)
.Case(section_names::debugStr, &dObj->strSection)
.Default(nullptr)) {

View File

@ -37,12 +37,17 @@ public:
llvm::StringRef getAbbrevSection() const override { return abbrevSection; }
llvm::StringRef getStrSection() const override { return strSection; }
llvm::DWARFSection const &getLineSection() const override {
return lineSection;
}
// Returns an instance of DwarfObject if the given object file has the
// relevant DWARF debug sections.
static std::unique_ptr<DwarfObject> create(ObjFile *);
private:
llvm::DWARFSection infoSection;
llvm::DWARFSection lineSection;
llvm::StringRef abbrevSection;
llvm::StringRef strSection;
};

View File

@ -998,6 +998,8 @@ void ObjFile::parseDebugInfo() {
if (!dObj)
return;
// We do not re-use the context from getDwarf() here as that function
// constructs an expensive DWARFCache object.
auto *ctx = make<DWARFContext>(
std::move(dObj), "",
[&](Error err) {
@ -1373,6 +1375,31 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
}
}
std::string ObjFile::sourceFile() const {
SmallString<261> dir(compileUnit->getCompilationDir());
StringRef sep = sys::path::get_separator();
// We don't use `path::append` here because we want an empty `dir` to result
// in an absolute path. `append` would give us a relative path for that case.
if (!dir.endswith(sep))
dir += sep;
return (dir + compileUnit->getUnitDIE().getShortName()).str();
}
lld::DWARFCache *ObjFile::getDwarf() {
llvm::call_once(initDwarf, [this]() {
auto dwObj = DwarfObject::create(this);
if (!dwObj)
return;
dwarfCache = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
std::move(dwObj), "",
[&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
[&](Error warning) {
warn(getName() + ": " + toString(std::move(warning)));
}));
});
return dwarfCache.get();
}
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);

View File

@ -12,6 +12,7 @@
#include "MachOStructs.h"
#include "Target.h"
#include "lld/Common/DWARF.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/CachedHashString.h"
@ -21,6 +22,7 @@
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Threading.h"
#include "llvm/TextAPI/TextAPIReader.h"
#include <vector>
@ -159,7 +161,13 @@ public:
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
std::string sourceFile() const;
// Parses line table information for diagnostics. compileUnit should be used
// for other purposes.
lld::DWARFCache *getDwarf();
llvm::DWARFUnit *compileUnit = nullptr;
std::unique_ptr<lld::DWARFCache> dwarfCache;
Section *addrSigSection = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
@ -167,6 +175,7 @@ public:
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
private:
llvm::once_flag initDwarf;
template <class LP> void parseLazy();
template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
template <class LP>

View File

@ -55,17 +55,21 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
return sym->getVA();
}
const Defined *InputSection::getContainingSymbol(uint64_t off) const {
auto *nextSym = llvm::upper_bound(
symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; });
if (nextSym == symbols.begin())
return nullptr;
return *std::prev(nextSym);
}
std::string InputSection::getLocation(uint64_t off) const {
// First, try to find a symbol that's near the offset. Use it as a reference
// point.
auto *nextSym = llvm::upper_bound(
symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; });
if (nextSym != symbols.begin()) {
auto &sym = *std::prev(nextSym);
if (auto *sym = getContainingSymbol(off))
return (toString(getFile()) + ":(symbol " + sym->getName() + "+0x" +
Twine::utohexstr(off - sym->value) + ")")
.str();
}
// If that fails, use the section itself as a reference point.
for (const Subsection &subsec : section.subsections) {
@ -74,11 +78,61 @@ std::string InputSection::getLocation(uint64_t off) const {
break;
}
}
return (toString(getFile()) + ":(" + getName() + "+0x" +
Twine::utohexstr(off) + ")")
.str();
}
std::string InputSection::getSourceLocation(uint64_t off) const {
auto *obj = dyn_cast<ObjFile>(getFile());
if (!obj)
return {};
DWARFCache *dwarf = obj->getDwarf();
if (!dwarf)
return std::string();
for (const Subsection &subsec : section.subsections) {
if (subsec.isec == this) {
off += subsec.offset;
break;
}
}
auto createMsg = [&](StringRef path, unsigned line) {
std::string filename = sys::path::filename(path).str();
std::string lineStr = (":" + Twine(line)).str();
if (filename == path)
return filename + lineStr;
return (filename + lineStr + " (" + path + lineStr + ")").str();
};
// First, look up a function for a given offset.
if (Optional<DILineInfo> li = dwarf->getDILineInfo(
section.addr + off, object::SectionedAddress::UndefSection))
return createMsg(li->FileName, li->Line);
// If it failed, look up again as a variable.
if (const Defined *sym = getContainingSymbol(off)) {
// Symbols are generally prefixed with an underscore, which is not included
// in the debug information.
StringRef symName = sym->getName();
if (!symName.empty() && symName[0] == '_')
symName = symName.substr(1);
if (Optional<std::pair<std::string, unsigned>> fileLine =
dwarf->getVariableLoc(symName))
return createMsg(fileLine->first, fileLine->second);
}
// Try to get the source file's name from the DWARF information.
if (obj->compileUnit)
return obj->sourceFile();
return {};
}
void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
align = std::max(align, copy->align);
copy->live = false;

View File

@ -50,7 +50,11 @@ public:
// The offset from the beginning of the file.
uint64_t getVA(uint64_t off) const;
// Return a user-friendly string for use in diagnostics.
// Format: /path/to/object.o:(symbol _func+0x123)
std::string getLocation(uint64_t off) const;
// Return the source line corresponding to an address, or the empty string.
// Format: Source.cpp:123 (/path/to/Source.cpp:123)
std::string getSourceLocation(uint64_t off) const;
// Whether the data at \p off in this InputSection is live.
virtual bool isLive(uint64_t off) const = 0;
virtual void markLive(uint64_t off) = 0;
@ -85,6 +89,8 @@ public:
protected:
const Section &section;
const Defined *getContainingSymbol(uint64_t off) const;
};
// ConcatInputSections are combined into (Concat)OutputSections through simple
@ -292,6 +298,7 @@ constexpr const char compactUnwind[] = "__compact_unwind";
constexpr const char data[] = "__data";
constexpr const char debugAbbrev[] = "__debug_abbrev";
constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugLine[] = "__debug_line";
constexpr const char debugStr[] = "__debug_str";
constexpr const char ehFrame[] = "__eh_frame";
constexpr const char gccExceptTab[] = "__gcc_except_tab";

View File

@ -381,8 +381,11 @@ void macho::reportPendingUndefinedSymbols() {
locations.codeReferences) {
if (i >= maxUndefinedReferences)
break;
// TODO: Get source file/line from debug information.
message += "\n>>> referenced by " + loc.isec->getLocation(loc.offset);
message += "\n>>> referenced by ";
std::string src = loc.isec->getSourceLocation(loc.offset);
if (!src.empty())
message += src + "\n>>> ";
message += loc.isec->getLocation(loc.offset);
++i;
}

View File

@ -834,16 +834,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: LinkEditSection(segment_names::linkEdit, section_names::symbolTable),
stringTableSection(stringTableSection) {}
void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
void SymtabSection::emitBeginSourceStab(StringRef sourceFile) {
StabsEntry stab(N_SO);
SmallString<261> dir(compileUnit->getCompilationDir());
StringRef sep = sys::path::get_separator();
// We don't use `path::append` here because we want an empty `dir` to result
// in an absolute path. `append` would give us a relative path for that case.
if (!dir.endswith(sep))
dir += sep;
stab.strx = stringTableSection.addString(
saver().save(dir + compileUnit->getUnitDIE().getShortName()));
stab.strx = stringTableSection.addString(saver().save(sourceFile));
stabs.emplace_back(std::move(stab));
}
@ -938,7 +931,7 @@ void SymtabSection::emitStabs() {
emitEndSourceStab();
lastFile = file;
emitBeginSourceStab(file->compileUnit);
emitBeginSourceStab(file->sourceFile());
emitObjectFileStab(file);
}

View File

@ -435,7 +435,7 @@ public:
uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
private:
void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
void emitBeginSourceStab(StringRef);
void emitEndSourceStab();
void emitObjectFileStab(ObjFile *);
void emitEndFunStab(Defined *);

View File

@ -0,0 +1,210 @@
# REQUIRES: aarch64
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %s -o %t.o
# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: undefined symbol: _undef
# CHECK-NEXT: >>> referenced by test.c:3
# CHECK-NEXT: >>> {{.*}}.o:(symbol _main+0x0)
# CHECK-NEXT: >>> referenced by test.c:2
# CHECK-NEXT >>> {{.*}}.o:(symbol _ptr+0x0)
## This is the output of `clang -g2 -O2 -fdebug-compilation-dir=. -fno-ident` called on the following file, with the
## Apple DWARF tables removed:
##
## int undef();
## int (*ptr)() = &undef;
## int main() { return undef(); };
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0 sdk_version 13, 0
.file 1 "." "test.c"
.globl _main ; -- Begin function main
.p2align 2
_main: ; @main
Lfunc_begin0:
.loc 1 3 0 ; test.c:3:0
.cfi_startproc
; %bb.0:
.loc 1 3 21 prologue_end ; test.c:3:21
b _undef
Ltmp0:
Lfunc_end0:
.cfi_endproc
; -- End function
.section __DATA,__data
.globl _ptr ; @ptr
.p2align 3
_ptr:
.quad _undef
.section __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
.byte 1 ; Abbreviation Code
.byte 17 ; DW_TAG_compile_unit
.byte 1 ; DW_CHILDREN_yes
.byte 37 ; DW_AT_producer
.byte 14 ; DW_FORM_strp
.byte 19 ; DW_AT_language
.byte 5 ; DW_FORM_data2
.byte 3 ; DW_AT_name
.byte 14 ; DW_FORM_strp
.ascii "\202|" ; DW_AT_LLVM_sysroot
.byte 14 ; DW_FORM_strp
.ascii "\357\177" ; DW_AT_APPLE_sdk
.byte 14 ; DW_FORM_strp
.byte 16 ; DW_AT_stmt_list
.byte 23 ; DW_FORM_sec_offset
.byte 27 ; DW_AT_comp_dir
.byte 14 ; DW_FORM_strp
.ascii "\341\177" ; DW_AT_APPLE_optimized
.byte 25 ; DW_FORM_flag_present
.byte 17 ; DW_AT_low_pc
.byte 1 ; DW_FORM_addr
.byte 18 ; DW_AT_high_pc
.byte 6 ; DW_FORM_data4
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 2 ; Abbreviation Code
.byte 52 ; DW_TAG_variable
.byte 0 ; DW_CHILDREN_no
.byte 3 ; DW_AT_name
.byte 14 ; DW_FORM_strp
.byte 73 ; DW_AT_type
.byte 19 ; DW_FORM_ref4
.byte 63 ; DW_AT_external
.byte 25 ; DW_FORM_flag_present
.byte 58 ; DW_AT_decl_file
.byte 11 ; DW_FORM_data1
.byte 59 ; DW_AT_decl_line
.byte 11 ; DW_FORM_data1
.byte 2 ; DW_AT_location
.byte 24 ; DW_FORM_exprloc
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 3 ; Abbreviation Code
.byte 15 ; DW_TAG_pointer_type
.byte 0 ; DW_CHILDREN_no
.byte 73 ; DW_AT_type
.byte 19 ; DW_FORM_ref4
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 4 ; Abbreviation Code
.byte 21 ; DW_TAG_subroutine_type
.byte 1 ; DW_CHILDREN_yes
.byte 73 ; DW_AT_type
.byte 19 ; DW_FORM_ref4
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 5 ; Abbreviation Code
.byte 24 ; DW_TAG_unspecified_parameters
.byte 0 ; DW_CHILDREN_no
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 6 ; Abbreviation Code
.byte 36 ; DW_TAG_base_type
.byte 0 ; DW_CHILDREN_no
.byte 3 ; DW_AT_name
.byte 14 ; DW_FORM_strp
.byte 62 ; DW_AT_encoding
.byte 11 ; DW_FORM_data1
.byte 11 ; DW_AT_byte_size
.byte 11 ; DW_FORM_data1
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 7 ; Abbreviation Code
.byte 46 ; DW_TAG_subprogram
.byte 0 ; DW_CHILDREN_no
.byte 17 ; DW_AT_low_pc
.byte 1 ; DW_FORM_addr
.byte 18 ; DW_AT_high_pc
.byte 6 ; DW_FORM_data4
.ascii "\347\177" ; DW_AT_APPLE_omit_frame_ptr
.byte 25 ; DW_FORM_flag_present
.byte 64 ; DW_AT_frame_base
.byte 24 ; DW_FORM_exprloc
.byte 3 ; DW_AT_name
.byte 14 ; DW_FORM_strp
.byte 58 ; DW_AT_decl_file
.byte 11 ; DW_FORM_data1
.byte 59 ; DW_AT_decl_line
.byte 11 ; DW_FORM_data1
.byte 73 ; DW_AT_type
.byte 19 ; DW_FORM_ref4
.byte 63 ; DW_AT_external
.byte 25 ; DW_FORM_flag_present
.ascii "\341\177" ; DW_AT_APPLE_optimized
.byte 25 ; DW_FORM_flag_present
.byte 0 ; EOM(1)
.byte 0 ; EOM(2)
.byte 0 ; EOM(3)
.section __DWARF,__debug_info,regular,debug
Lsection_info:
Lcu_begin0:
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ; Length of Unit
.long Lset0
Ldebug_info_start0:
.short 4 ; DWARF version number
.set Lset1, Lsection_abbrev-Lsection_abbrev ; Offset Into Abbrev. Section
.long Lset1
.byte 8 ; Address Size (in bytes)
.byte 1 ; Abbrev [1] 0xb:0x69 DW_TAG_compile_unit
.long 0 ; DW_AT_producer
.short 12 ; DW_AT_language
.long 1 ; DW_AT_name
.long 8 ; DW_AT_LLVM_sysroot
.long 60 ; DW_AT_APPLE_sdk
.set Lset2, Lline_table_start0-Lsection_line ; DW_AT_stmt_list
.long Lset2
.long 71 ; DW_AT_comp_dir
; DW_AT_APPLE_optimized
.quad Lfunc_begin0 ; DW_AT_low_pc
.set Lset3, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc
.long Lset3
.byte 2 ; Abbrev [2] 0x32:0x15 DW_TAG_variable
.long 73 ; DW_AT_name
.long 71 ; DW_AT_type
; DW_AT_external
.byte 1 ; DW_AT_decl_file
.byte 2 ; DW_AT_decl_line
.byte 9 ; DW_AT_location
.byte 3
.quad _ptr
.byte 3 ; Abbrev [3] 0x47:0x5 DW_TAG_pointer_type
.long 76 ; DW_AT_type
.byte 4 ; Abbrev [4] 0x4c:0x7 DW_TAG_subroutine_type
.long 83 ; DW_AT_type
.byte 5 ; Abbrev [5] 0x51:0x1 DW_TAG_unspecified_parameters
.byte 0 ; End Of Children Mark
.byte 6 ; Abbrev [6] 0x53:0x7 DW_TAG_base_type
.long 77 ; DW_AT_name
.byte 5 ; DW_AT_encoding
.byte 4 ; DW_AT_byte_size
.byte 7 ; Abbrev [7] 0x5a:0x19 DW_TAG_subprogram
.quad Lfunc_begin0 ; DW_AT_low_pc
.set Lset4, Lfunc_end0-Lfunc_begin0 ; DW_AT_high_pc
.long Lset4
; DW_AT_APPLE_omit_frame_ptr
.byte 1 ; DW_AT_frame_base
.byte 111
.long 81 ; DW_AT_name
.byte 1 ; DW_AT_decl_file
.byte 3 ; DW_AT_decl_line
.long 83 ; DW_AT_type
; DW_AT_external
; DW_AT_APPLE_optimized
.byte 0 ; End Of Children Mark
Ldebug_info_end0:
.section __DWARF,__debug_str,regular,debug
Linfo_string:
.byte 0 ; string offset=0
.asciz "test.c" ; string offset=1
.asciz "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" ; string offset=8
.asciz "MacOSX.sdk" ; string offset=60
.asciz "." ; string offset=71
.asciz "ptr" ; string offset=73
.asciz "int" ; string offset=77
.asciz "main" ; string offset=81
.section __DWARF,__debug_line,regular,debug
Lsection_line:
Lline_table_start0: