[ELF] Move duplicate symbol check after input file parsing

https://discourse.llvm.org/t/parallel-input-file-parsing/60164

To decouple symbol initialization and section initialization, `Defined::section`
assignment should be postponed after input file parsing. To avoid spurious
duplicate definition error due to two definitions in COMDAT groups of the same
signature, we should postpone the duplicate symbol check.

The function is called postScan instead of a more specific name like
checkDuplicateSymbols, because we may merge Symbol::mergeProperties into
postScan. It is placed after compileBitcodeFiles to apply to ET_REL files
produced by LTO. This causes minor diagnostic regression
for skipLinkedOutput configurations: ld.lld --thinlto-index-only a.bc b.o
(bitcode definition prevails) won't detect duplicate symbol error. I think this
is an acceptable compromise. The important cases where (a) both files are
bitcode or (b) --thinlto-index-only is unused are still detected.

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D119908
This commit is contained in:
Fangrui Song 2022-02-22 10:07:58 -08:00
parent 104d9a6743
commit 88d66f6ed1
10 changed files with 116 additions and 28 deletions

View File

@ -2118,6 +2118,8 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
map.try_emplace(sym, sym2);
// If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
// definition error.
if (sym->isDefined())
sym2->checkDuplicate(cast<Defined>(*sym));
sym2->resolve(*sym);
// Eliminate foo@v1 from the symbol table.
sym->symbolKind = Symbol::PlaceholderKind;
@ -2223,6 +2225,25 @@ static uint32_t getAndFeatures() {
return ret;
}
static void postParseObjectFile(ELFFileBase *file) {
switch (config->ekind) {
case ELF32LEKind:
cast<ObjFile<ELF32LE>>(file)->postParse();
break;
case ELF32BEKind:
cast<ObjFile<ELF32BE>>(file)->postParse();
break;
case ELF64LEKind:
cast<ObjFile<ELF64LE>>(file)->postParse();
break;
case ELF64BEKind:
cast<ObjFile<ELF64BE>>(file)->postParse();
break;
default:
llvm_unreachable("");
}
}
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
void LinkerDriver::link(opt::InputArgList &args) {
@ -2340,6 +2361,11 @@ void LinkerDriver::link(opt::InputArgList &args) {
for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
handleLibcall(s);
// No more lazy bitcode can be extracted at this point. Do post parse work
// like checking duplicate symbols.
parallelForEach(objectFiles, postParseObjectFile);
parallelForEach(bitcodeFiles, [](BitcodeFile *file) { file->postParse(); });
// Return if there were name resolution errors.
if (errorCount())
return;
@ -2393,6 +2419,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
//
// With this the symbol table should be complete. After this, no new names
// except a few linker-synthesized ones will be added to the symbol table.
const size_t numObjsBeforeLTO = objectFiles.size();
invokeELFT(compileBitcodeFiles, skipLinkedOutput);
// Symbol resolution finished. Report backward reference problems.
@ -2404,6 +2431,11 @@ void LinkerDriver::link(opt::InputArgList &args) {
if (skipLinkedOutput)
return;
// compileBitcodeFiles may have produced lto.tmp object files. After this, no
// more file will be added.
auto newObjectFiles = makeArrayRef(objectFiles).slice(numObjsBeforeLTO);
parallelForEach(newObjectFiles, postParseObjectFile);
// Handle --exclude-libs again because lto.tmp may reference additional
// libcalls symbols defined in an excluded archive. This may override
// versionId set by scanVersionScript().

View File

@ -1149,6 +1149,33 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
}
}
// Called after all ObjFile::parse is called for all ObjFiles. This checks
// duplicate symbols and may do symbol property merge in the future.
template <class ELFT> void ObjFile<ELFT>::postParse() {
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
const Elf_Sym &eSym = eSyms[i];
const Symbol &sym = *symbols[i];
// !sym.file allows a symbol assignment redefines a symbol without an error.
if (sym.file == this || !sym.file || !sym.isDefined() ||
eSym.st_shndx == SHN_UNDEF || eSym.st_shndx == SHN_COMMON ||
eSym.getBinding() == STB_WEAK)
continue;
uint32_t secIdx = eSym.st_shndx;
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
secIdx = cantFail(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
else if (secIdx >= SHN_LORESERVE)
secIdx = 0;
if (sections[secIdx] == &InputSection::discarded)
continue;
// Allow absolute symbols with the same value for GNU ld compatibility.
if (!cast<Defined>(sym).section && !sections[secIdx] &&
cast<Defined>(sym).value == eSym.st_value)
continue;
reportDuplicate(sym, this, sections[secIdx], eSym.st_value);
}
}
// The handling of tentative definitions (COMMON symbols) in archives is murky.
// A tentative definition will be promoted to a global definition if there are
// no non-tentative definitions to dominate it. When we hold a tentative
@ -1617,7 +1644,6 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
}
template <class ELFT> void BitcodeFile::parse() {
std::vector<bool> keptComdats;
for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
keptComdats.push_back(
s.second == Comdat::NoDeduplicate ||
@ -1646,6 +1672,20 @@ void BitcodeFile::parseLazy() {
}
}
void BitcodeFile::postParse() {
for (auto it : llvm::enumerate(obj->symbols())) {
const Symbol &sym = *symbols[it.index()];
const auto &objSym = it.value();
if (sym.file == this || !sym.isDefined() || objSym.isUndefined() ||
objSym.isCommon() || objSym.isWeak())
continue;
int c = objSym.getComdatIndex();
if (c != -1 && !keptComdats[c])
continue;
reportDuplicate(sym, this, nullptr, 0);
}
}
void BinaryFile::parse() {
ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
@ -1663,12 +1703,15 @@ void BinaryFile::parse() {
llvm::StringSaver &saver = lld::saver();
symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL,
STV_DEFAULT, STT_OBJECT, 0, 0, section});
symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL,
STV_DEFAULT, STT_OBJECT, data.size(), 0, section});
symtab->addSymbol(Defined{nullptr, saver.save(s + "_size"), STB_GLOBAL,
STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr});
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0,
0, section});
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
data.size(), 0, section});
symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"),
STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
data.size(), 0, nullptr});
}
InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,

View File

@ -273,6 +273,8 @@ public:
// Get cached DWARF information.
DWARFCache *getDwarf();
void postParse();
private:
void initializeSections(bool ignoreComdats,
const llvm::object::ELFFile<ELFT> &obj);
@ -315,7 +317,9 @@ public:
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
template <class ELFT> void parse();
void parseLazy();
void postParse();
std::unique_ptr<llvm::lto::InputFile> obj;
std::vector<bool> keptComdats;
};
// .so file.

View File

@ -102,6 +102,16 @@ Symbol *SymbolTable::addSymbol(const Symbol &newSym) {
return sym;
}
// This variant of addSymbol is used by BinaryFile::parse to check duplicate
// symbol errors.
Symbol *SymbolTable::addAndCheckDuplicate(const Defined &newSym) {
Symbol *sym = insert(newSym.getName());
if (sym->isDefined())
sym->checkDuplicate(newSym);
sym->resolve(newSym);
return sym;
}
Symbol *SymbolTable::find(StringRef name) {
auto it = symMap.find(CachedHashStringRef(name));
if (it == symMap.end())

View File

@ -39,6 +39,7 @@ public:
Symbol *insert(StringRef name);
Symbol *addSymbol(const Symbol &newSym);
Symbol *addAndCheckDuplicate(const Defined &newSym);
void scanVersionScript();

View File

@ -572,21 +572,11 @@ int Symbol::compare(const Symbol *other) const {
return -1;
}
auto *oldSym = cast<Defined>(this);
auto *newSym = cast<Defined>(other);
if (isa_and_nonnull<BitcodeFile>(other->file))
return 0;
if (!oldSym->section && !newSym->section && oldSym->value == newSym->value &&
newSym->binding == STB_GLOBAL)
return -1;
return 0;
}
static void reportDuplicate(const Symbol &sym, InputFile *newFile,
InputSectionBase *errSec, uint64_t errOffset) {
void elf::reportDuplicate(const Symbol &sym, InputFile *newFile,
InputSectionBase *errSec, uint64_t errOffset) {
if (config->allowMultipleDefinition)
return;
const Defined *d = cast<Defined>(&sym);
@ -619,6 +609,13 @@ static void reportDuplicate(const Symbol &sym, InputFile *newFile,
error(msg);
}
void Symbol::checkDuplicate(const Defined &other) const {
if (compare(&other) == 0)
reportDuplicate(*this, other.file,
dyn_cast_or_null<InputSectionBase>(other.section),
other.value);
}
void Symbol::resolveCommon(const CommonSymbol &other) {
int cmp = compare(&other);
if (cmp < 0)
@ -653,10 +650,6 @@ void Symbol::resolveDefined(const Defined &other) {
int cmp = compare(&other);
if (cmp > 0)
replace(other);
else if (cmp == 0)
reportDuplicate(*this, other.file,
dyn_cast_or_null<InputSectionBase>(other.section),
other.value);
}
template <class LazyT>

View File

@ -213,6 +213,8 @@ public:
// non-lazy object causes a runtime error.
void extract() const;
void checkDuplicate(const Defined &other) const;
private:
void resolveUndefined(const Undefined &other);
void resolveCommon(const CommonSymbol &other);
@ -569,6 +571,8 @@ template <typename... T> Defined *makeDefined(T &&...args) {
Defined(std::forward<T>(args)...);
}
void reportDuplicate(const Symbol &sym, InputFile *newFile,
InputSectionBase *errSec, uint64_t errOffset);
void maybeWarnUnorderableSymbol(const Symbol *sym);
bool computeIsPreemptible(const Symbol &sym);
void reportBackrefs();

View File

@ -9,11 +9,11 @@
# RUN: not ld.lld %t.o %t.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
# CHECK-NEXT: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
# CHECK-NEXT: error: duplicate symbol: _start
# CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
# CHECK-NEXT: >>> defined at {{.*}}.o:(.text+0x0)
# CHECK-EMPTY:
# CHECK-NEXT: error: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)
# RUN: ld.lld --noinhibit-exec %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN
# WARN: warning: {{.*}}.o: STB_LOCAL symbol (2) found at index >= .symtab's sh_info (1)

View File

@ -8,17 +8,18 @@
;; --thinlto-index-only skips some passes. Test the error is present.
; RUN: not ld.lld %t/a.bc %t/a.bc --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s
; RUN: not ld.lld %t/b.o %t/a.bc --lto-emit-asm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2
; RUN: not ld.lld %t/a.bc %t/b.o --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2
;; --undefined-glob g extracts %t/c.bc which causes a duplicate symbol error.
; RUN: not ld.lld %t/a.bc --start-lib %t/c.bc --undefined-glob g --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK
; RUN: not ld.lld %t/a.bc --start-lib %t/c.bc --undefined-glob g --thinlto-index-only -o /dev/null 2>&1 | FileCheck %s
; CHECK: duplicate symbol: f
; CHECK-NEXT: >>> defined in {{.*}}.bc
; CHECK-NEXT: >>> defined in {{.*}}.bc
; CHECK2: duplicate symbol: f
; CHECK2-NEXT: >>> defined in {{.*}}.o
; CHECK2-NEXT: >>> defined in {{.*}}.bc
; CHECK2-NEXT: >>> defined in {{.*}}
; CHECK2-NEXT: >>> defined in {{.*}}
;--- a.ll
target triple = "x86_64-unknown-linux-gnu"

View File

@ -2,7 +2,7 @@
// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate2.s -o %t2.o
// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate3.s -o %t3.o
// RUN: not ld.lld --vs-diagnostics %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
// RUN: not ld.lld --vs-diagnostics --threads=1 %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
// Case 1. The source locations are unknown for both symbols.
// CHECK: {{.*}}ld.lld{{.*}}: error: duplicate symbol: foo