From 1ebc8ed78ac5df1ad5ba68eb8d8974c1cea1bffa Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Fri, 12 Feb 2016 21:47:28 +0000 Subject: [PATCH] ELF: Add wildcard pattern matching to SECTIONS linker script command. Each rule in SECTIONS commands is something like ".foo *(.baz.*)", which instructs the linker to collect all sections whose name matches ".baz.*" from all files and put them into .foo section. Previously, we didn't recognize the wildcard character. This patch adds that feature. Performance impact is a bit concerning because a linker script can contain hundreds of SECTIONS rules, and doing pattern matching against each rule would be too expensive. We could merge all patterns into single DFA so that it takes O(n) to the input size. However, it is probably too much at this moment -- we don't know whether the performance of pattern matching matters or not. So I chose to implement the simplest algorithm in this patch. I hope this simple pattern matcher is sufficient. llvm-svn: 260745 --- lld/ELF/LinkerScript.cpp | 59 +++++++++++++++++++++++++--- lld/ELF/LinkerScript.h | 25 +++++++++--- lld/ELF/Writer.cpp | 20 +++++----- lld/test/ELF/linkerscript-sections.s | 14 +++++++ 4 files changed, 98 insertions(+), 20 deletions(-) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 1e0e855171dd..f43f99debe4f 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -16,6 +16,7 @@ #include "LinkerScript.h" #include "Config.h" #include "Driver.h" +#include "InputSection.h" #include "SymbolTable.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -23,17 +24,23 @@ #include "llvm/Support/StringSaver.h" using namespace llvm; +using namespace llvm::object; using namespace lld; using namespace lld::elf2; LinkerScript *elf2::Script; -StringRef LinkerScript::getOutputSection(StringRef S) { - return Sections.lookup(S); +template +StringRef LinkerScript::getOutputSection(InputSectionBase *S) { + for (SectionRule &R : Sections) + if (R.match(S)) + return R.Dest; + return ""; } -bool LinkerScript::isDiscarded(StringRef S) { - return Sections.lookup(S) == "/DISCARD/"; +template +bool LinkerScript::isDiscarded(InputSectionBase *S) { + return getOutputSection(S) == "/DISCARD/"; } // A compartor to sort output sections. Returns -1 or 1 if both @@ -48,6 +55,33 @@ int LinkerScript::compareSections(StringRef A, StringRef B) { return I < J ? -1 : 1; } +// Returns true if S matches T. S may contain a meta character '*' +// which matches zero or more occurrences of any character. +static bool matchStr(StringRef S, StringRef T) { + for (;;) { + if (S.empty()) + return T.empty(); + if (S[0] == '*') { + S = S.substr(1); + if (S.empty()) + // Fast path. If a pattern is '*', it matches anything. + return true; + for (size_t I = 0, E = T.size(); I < E; ++I) + if (matchStr(S, T.substr(I))) + return true; + return false; + } + if (T.empty() || S[0] != T[0]) + return false; + S = S.substr(1); + T = T.substr(1); + } +} + +template bool SectionRule::match(InputSectionBase *S) { + return matchStr(SectionPattern, S->getSectionName()); +} + class elf2::ScriptParser { public: ScriptParser(BumpPtrAllocator *A, StringRef S, bool B) @@ -352,7 +386,7 @@ void ScriptParser::readOutputSectionDescription() { next(); // Skip input file name. expect("("); while (!Error && !skip(")")) - Script->Sections[next()] = OutSec; + Script->Sections.push_back({OutSec, next()}); } } @@ -370,3 +404,18 @@ void LinkerScript::read(MemoryBufferRef MB) { StringRef Path = MB.getBufferIdentifier(); ScriptParser(&Alloc, MB.getBuffer(), isUnderSysroot(Path)).run(); } + +template StringRef LinkerScript::getOutputSection(InputSectionBase *); +template StringRef LinkerScript::getOutputSection(InputSectionBase *); +template StringRef LinkerScript::getOutputSection(InputSectionBase *); +template StringRef LinkerScript::getOutputSection(InputSectionBase *); + +template bool LinkerScript::isDiscarded(InputSectionBase *); +template bool LinkerScript::isDiscarded(InputSectionBase *); +template bool LinkerScript::isDiscarded(InputSectionBase *); +template bool LinkerScript::isDiscarded(InputSectionBase *); + +template bool SectionRule::match(InputSectionBase *); +template bool SectionRule::match(InputSectionBase *); +template bool SectionRule::match(InputSectionBase *); +template bool SectionRule::match(InputSectionBase *); diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index bdfcb7d42562..edda27bdb943 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -20,7 +20,23 @@ namespace lld { namespace elf2 { class ScriptParser; +template class InputSectionBase; +// This class represents each rule in SECTIONS command. +class SectionRule { +public: + SectionRule(StringRef D, StringRef S) : Dest(D), SectionPattern(S) {} + + // Returns true if S should be in Dest section. + template bool match(InputSectionBase *S); + + StringRef Dest; + +private: + StringRef SectionPattern; +}; + +// This is a runner of the linker script. class LinkerScript { friend class ScriptParser; @@ -29,14 +45,13 @@ public: // this object and Config. void read(MemoryBufferRef MB); - StringRef getOutputSection(StringRef InputSection); - bool isDiscarded(StringRef InputSection); + template StringRef getOutputSection(InputSectionBase *S); + template bool isDiscarded(InputSectionBase *S); int compareSections(StringRef A, StringRef B); private: - // A map for SECTIONS command. The key is input section name - // and the value is the corresponding output section name. - llvm::DenseMap Sections; + // SECTIONS commands. + std::vector Sections; // Output sections are sorted by this order. std::vector SectionOrder; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index e41b299ec203..0ea780e58e04 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -74,7 +74,7 @@ private: void writeHeader(); void writeSections(); bool isDiscarded(InputSectionBase *IS) const; - StringRef getOutputSectionName(StringRef S) const; + StringRef getOutputSectionName(InputSectionBase *S) const; bool needsInterpSection() const { return !Symtab.getSharedFiles().empty() && !Config->DynamicLinker.empty(); } @@ -726,16 +726,17 @@ void Writer::addCopyRelSymbols(std::vector *> &Syms) { } template -StringRef Writer::getOutputSectionName(StringRef S) const { - StringRef Out = Script->getOutputSection(S); - if (!Out.empty()) - return Out; +StringRef Writer::getOutputSectionName(InputSectionBase *S) const { + StringRef Dest = Script->getOutputSection(S); + if (!Dest.empty()) + return Dest; + StringRef Name = S->getSectionName(); for (StringRef V : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors."}) - if (S.startswith(V)) + if (Name.startswith(V)) return V.drop_back(); - return S; + return Name; } template @@ -750,7 +751,7 @@ void reportDiscarded(InputSectionBase *IS, template bool Writer::isDiscarded(InputSectionBase *S) const { return !S || !S->isLive() || S == &InputSection::Discarded || - Script->isDiscarded(S->getSectionName()); + Script->isDiscarded(S); } // The beginning and the ending of .rel[a].plt section are marked @@ -934,8 +935,7 @@ template bool Writer::createSections() { } OutputSectionBase *Sec; bool IsNew; - std::tie(Sec, IsNew) = - Factory.create(C, getOutputSectionName(C->getSectionName())); + std::tie(Sec, IsNew) = Factory.create(C, getOutputSectionName(C)); if (IsNew) { OwningSections.emplace_back(Sec); OutputSections.push_back(Sec); diff --git a/lld/test/ELF/linkerscript-sections.s b/lld/test/ELF/linkerscript-sections.s index ea9ae2b2726d..9471ccde6829 100644 --- a/lld/test/ELF/linkerscript-sections.s +++ b/lld/test/ELF/linkerscript-sections.s @@ -38,6 +38,20 @@ # RUN: llvm-objdump -section-headers %t3 | \ # RUN: FileCheck -check-prefix=SEC-ORDER %s +# The same test as above but with wildcard patterns. +# RUN: echo "SECTIONS { \ +# RUN: .bss : { *(.bss) } \ +# RUN: other : { *(o*er) } \ +# RUN: .shstrtab : { *(.shstrt*) } \ +# RUN: .symtab : { *(.symtab) } \ +# RUN: .strtab : { *(.strtab) } \ +# RUN: .data : { *(*data) } \ +# RUN: .text : { *(.text) } }" > %t.script +# RUN: cp %t %t.abc +# RUN: ld.lld -o %t3 --script %t.script %t.abc +# RUN: llvm-objdump -section-headers %t3 | \ +# RUN: FileCheck -check-prefix=SEC-ORDER %s + # Idx Name Size # SEC-ORDER: 1 .bss 00000002 {{[0-9a-f]*}} BSS # SEC-ORDER: 2 other 00000003 {{[0-9a-f]*}} DATA