[RuntimeDyld] Added support for relocation of indirect functions
In ELF, symbols of type STT_GNU_IFUNC need to be resolved by calling the function at the symbol's address. This is implemented by adding special stubs for all symbols of that type. Differential Revision: https://reviews.llvm.org/D105465
This commit is contained in:
parent
134bda4b61
commit
a667aa4de0
|
@ -773,6 +773,9 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
|
|||
if (isExportedToOtherDSO(ESym))
|
||||
Result |= SymbolRef::SF_Exported;
|
||||
|
||||
if (ESym->getType() == ELF::STT_GNU_IFUNC)
|
||||
Result |= SymbolRef::SF_Indirect;
|
||||
|
||||
if (ESym->getVisibility() == ELF::STV_HIDDEN)
|
||||
Result |= SymbolRef::SF_Hidden;
|
||||
|
||||
|
|
|
@ -310,9 +310,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
|
|||
<< " SID: " << SectionID
|
||||
<< " Offset: " << format("%p", (uintptr_t)Addr)
|
||||
<< " flags: " << *FlagsOrErr << "\n");
|
||||
if (!Name.empty()) // Skip absolute symbol relocations.
|
||||
GlobalSymbolTable[Name] =
|
||||
SymbolTableEntry(SectionID, Addr, *JITSymFlags);
|
||||
// Skip absolute symbol relocations.
|
||||
if (!Name.empty()) {
|
||||
auto Result = GlobalSymbolTable.insert_or_assign(
|
||||
Name, SymbolTableEntry(SectionID, Addr, *JITSymFlags));
|
||||
processNewSymbol(*I, Result.first->getValue());
|
||||
}
|
||||
} else if (SymType == object::SymbolRef::ST_Function ||
|
||||
SymType == object::SymbolRef::ST_Data ||
|
||||
SymType == object::SymbolRef::ST_Unknown ||
|
||||
|
@ -344,9 +347,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
|
|||
<< " SID: " << SectionID
|
||||
<< " Offset: " << format("%p", (uintptr_t)SectOffset)
|
||||
<< " flags: " << *FlagsOrErr << "\n");
|
||||
if (!Name.empty()) // Skip absolute symbol relocations
|
||||
GlobalSymbolTable[Name] =
|
||||
SymbolTableEntry(SectionID, SectOffset, *JITSymFlags);
|
||||
// Skip absolute symbol relocations.
|
||||
if (!Name.empty()) {
|
||||
auto Result = GlobalSymbolTable.insert_or_assign(
|
||||
Name, SymbolTableEntry(SectionID, SectOffset, *JITSymFlags));
|
||||
processNewSymbol(*I, Result.first->getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -632,6 +638,11 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
|
|||
RWDataAlign = std::max(RWDataAlign, CommonAlign);
|
||||
}
|
||||
|
||||
if (!CodeSectionSizes.empty()) {
|
||||
// Add 64 bytes for a potential IFunc resolver stub
|
||||
CodeSectionSizes.push_back(64);
|
||||
}
|
||||
|
||||
// Compute the required allocation space for each different type of sections
|
||||
// (code, read-only data, read-write data) assuming that all sections are
|
||||
// allocated with the max alignment. Note that we cannot compute with the
|
||||
|
|
|
@ -2292,18 +2292,75 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(uint64_t GOTOffset,
|
|||
return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset);
|
||||
}
|
||||
|
||||
void RuntimeDyldELF::processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Symbol) {
|
||||
// This should never return an error as `processNewSymbol` wouldn't have been
|
||||
// called if getFlags() returned an error before.
|
||||
auto ObjSymbolFlags = cantFail(ObjSymbol.getFlags());
|
||||
|
||||
if (ObjSymbolFlags & SymbolRef::SF_Indirect) {
|
||||
if (IFuncStubSectionID == 0) {
|
||||
// Create a dummy section for the ifunc stubs. It will be actually
|
||||
// allocated in finalizeLoad() below.
|
||||
IFuncStubSectionID = Sections.size();
|
||||
Sections.push_back(
|
||||
SectionEntry(".text.__llvm_IFuncStubs", nullptr, 0, 0, 0));
|
||||
// First 64B are reserverd for the IFunc resolver
|
||||
IFuncStubOffset = 64;
|
||||
}
|
||||
|
||||
IFuncStubs.push_back(IFuncStub{IFuncStubOffset, Symbol});
|
||||
// Modify the symbol so that it points to the ifunc stub instead of to the
|
||||
// resolver function.
|
||||
Symbol = SymbolTableEntry(IFuncStubSectionID, IFuncStubOffset,
|
||||
Symbol.getFlags());
|
||||
IFuncStubOffset += getMaxIFuncStubSize();
|
||||
}
|
||||
}
|
||||
|
||||
Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
|
||||
ObjSectionToIDMap &SectionMap) {
|
||||
if (IsMipsO32ABI)
|
||||
if (!PendingRelocs.empty())
|
||||
return make_error<RuntimeDyldError>("Can't find matching LO16 reloc");
|
||||
|
||||
// Create the IFunc stubs if necessary. This must be done before processing
|
||||
// the GOT entries, as the IFunc stubs may create some.
|
||||
if (IFuncStubSectionID != 0) {
|
||||
uint8_t *IFuncStubsAddr = MemMgr.allocateCodeSection(
|
||||
IFuncStubOffset, 1, IFuncStubSectionID, ".text.__llvm_IFuncStubs");
|
||||
if (!IFuncStubsAddr)
|
||||
return make_error<RuntimeDyldError>(
|
||||
"Unable to allocate memory for IFunc stubs!");
|
||||
Sections[IFuncStubSectionID] =
|
||||
SectionEntry(".text.__llvm_IFuncStubs", IFuncStubsAddr, IFuncStubOffset,
|
||||
IFuncStubOffset, 0);
|
||||
|
||||
createIFuncResolver(IFuncStubsAddr);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Creating IFunc stubs SectionID: "
|
||||
<< IFuncStubSectionID << " Addr: "
|
||||
<< Sections[IFuncStubSectionID].getAddress() << '\n');
|
||||
for (auto &IFuncStub : IFuncStubs) {
|
||||
auto &Symbol = IFuncStub.OriginalSymbol;
|
||||
LLVM_DEBUG(dbgs() << "\tSectionID: " << Symbol.getSectionID()
|
||||
<< " Offset: " << format("%p", Symbol.getOffset())
|
||||
<< " IFuncStubOffset: "
|
||||
<< format("%p\n", IFuncStub.StubOffset));
|
||||
createIFuncStub(IFuncStubSectionID, 0, IFuncStub.StubOffset,
|
||||
Symbol.getSectionID(), Symbol.getOffset());
|
||||
}
|
||||
|
||||
IFuncStubSectionID = 0;
|
||||
IFuncStubOffset = 0;
|
||||
IFuncStubs.clear();
|
||||
}
|
||||
|
||||
// If necessary, allocate the global offset table
|
||||
if (GOTSectionID != 0) {
|
||||
// Allocate memory for the section
|
||||
size_t TotalSize = CurrentGOTIndex * getGOTEntrySize();
|
||||
uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(),
|
||||
GOTSectionID, ".got", false);
|
||||
GOTSectionID, ".got", false);
|
||||
if (!Addr)
|
||||
return make_error<RuntimeDyldError>("Unable to allocate memory for GOT!");
|
||||
|
||||
|
@ -2326,7 +2383,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
|
|||
|
||||
section_iterator RelocatedSection = *RelSecOrErr;
|
||||
ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection);
|
||||
assert (i != SectionMap.end());
|
||||
assert(i != SectionMap.end());
|
||||
SectionToGOTMap[i->second] = GOTSectionID;
|
||||
}
|
||||
}
|
||||
|
@ -2362,6 +2419,110 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
|
|||
return Obj.isELF();
|
||||
}
|
||||
|
||||
void RuntimeDyldELF::createIFuncResolver(uint8_t *Addr) const {
|
||||
if (Arch == Triple::x86_64) {
|
||||
// The adddres of the GOT1 entry is in %r11, the GOT2 entry is in %r11+8
|
||||
// (see createIFuncStub() for details)
|
||||
// The following code first saves all registers that contain the original
|
||||
// function arguments as those registers are not saved by the resolver
|
||||
// function. %r11 is saved as well so that the GOT2 entry can be updated
|
||||
// afterwards. Then it calls the actual IFunc resolver function whose
|
||||
// address is stored in GOT2. After the resolver function returns, all
|
||||
// saved registers are restored and the return value is written to GOT1.
|
||||
// Finally, jump to the now resolved function.
|
||||
// clang-format off
|
||||
const uint8_t StubCode[] = {
|
||||
0x57, // push %rdi
|
||||
0x56, // push %rsi
|
||||
0x52, // push %rdx
|
||||
0x51, // push %rcx
|
||||
0x41, 0x50, // push %r8
|
||||
0x41, 0x51, // push %r9
|
||||
0x41, 0x53, // push %r11
|
||||
0x41, 0xff, 0x53, 0x08, // call *0x8(%r11)
|
||||
0x41, 0x5b, // pop %r11
|
||||
0x41, 0x59, // pop %r9
|
||||
0x41, 0x58, // pop %r8
|
||||
0x59, // pop %rcx
|
||||
0x5a, // pop %rdx
|
||||
0x5e, // pop %rsi
|
||||
0x5f, // pop %rdi
|
||||
0x49, 0x89, 0x03, // mov %rax,(%r11)
|
||||
0xff, 0xe0 // jmp *%rax
|
||||
};
|
||||
// clang-format on
|
||||
static_assert(sizeof(StubCode) <= 64,
|
||||
"maximum size of the IFunc resolver is 64B");
|
||||
memcpy(Addr, StubCode, sizeof(StubCode));
|
||||
} else {
|
||||
report_fatal_error(
|
||||
"IFunc resolver is not supported for target architecture");
|
||||
}
|
||||
}
|
||||
|
||||
void RuntimeDyldELF::createIFuncStub(unsigned IFuncStubSectionID,
|
||||
uint64_t IFuncResolverOffset,
|
||||
uint64_t IFuncStubOffset,
|
||||
unsigned IFuncSectionID,
|
||||
uint64_t IFuncOffset) {
|
||||
auto &IFuncStubSection = Sections[IFuncStubSectionID];
|
||||
auto *Addr = IFuncStubSection.getAddressWithOffset(IFuncStubOffset);
|
||||
|
||||
if (Arch == Triple::x86_64) {
|
||||
// The first instruction loads a PC-relative address into %r11 which is a
|
||||
// GOT entry for this stub. This initially contains the address to the
|
||||
// IFunc resolver. We can use %r11 here as it's caller saved but not used
|
||||
// to pass any arguments. In fact, x86_64 ABI even suggests using %r11 for
|
||||
// code in the PLT. The IFunc resolver will use %r11 to update the GOT
|
||||
// entry.
|
||||
//
|
||||
// The next instruction just jumps to the address contained in the GOT
|
||||
// entry. As mentioned above, we do this two-step jump by first setting
|
||||
// %r11 so that the IFunc resolver has access to it.
|
||||
//
|
||||
// The IFunc resolver of course also needs to know the actual address of
|
||||
// the actual IFunc resolver function. This will be stored in a GOT entry
|
||||
// right next to the first one for this stub. So, the IFunc resolver will
|
||||
// be able to call it with %r11+8.
|
||||
//
|
||||
// In total, two adjacent GOT entries (+relocation) and one additional
|
||||
// relocation are required:
|
||||
// GOT1: Address of the IFunc resolver.
|
||||
// GOT2: Address of the IFunc resolver function.
|
||||
// IFuncStubOffset+3: 32-bit PC-relative address of GOT1.
|
||||
uint64_t GOT1 = allocateGOTEntries(2);
|
||||
uint64_t GOT2 = GOT1 + getGOTEntrySize();
|
||||
|
||||
RelocationEntry RE1(GOTSectionID, GOT1, ELF::R_X86_64_64,
|
||||
IFuncResolverOffset, {});
|
||||
addRelocationForSection(RE1, IFuncStubSectionID);
|
||||
RelocationEntry RE2(GOTSectionID, GOT2, ELF::R_X86_64_64, IFuncOffset, {});
|
||||
addRelocationForSection(RE2, IFuncSectionID);
|
||||
|
||||
const uint8_t StubCode[] = {
|
||||
0x4c, 0x8d, 0x1d, 0x00, 0x00, 0x00, 0x00, // leaq 0x0(%rip),%r11
|
||||
0x41, 0xff, 0x23 // jmpq *(%r11)
|
||||
};
|
||||
assert(sizeof(StubCode) <= getMaxIFuncStubSize() &&
|
||||
"IFunc stub size must not exceed getMaxIFuncStubSize()");
|
||||
memcpy(Addr, StubCode, sizeof(StubCode));
|
||||
|
||||
// The PC-relative value starts 4 bytes from the end of the leaq
|
||||
// instruction, so the addend is -4.
|
||||
resolveGOTOffsetRelocation(IFuncStubSectionID, IFuncStubOffset + 3,
|
||||
GOT1 - 4, ELF::R_X86_64_PC32);
|
||||
} else {
|
||||
report_fatal_error("IFunc stub is not supported for target architecture");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned RuntimeDyldELF::getMaxIFuncStubSize() const {
|
||||
if (Arch == Triple::x86_64) {
|
||||
return 10;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const {
|
||||
unsigned RelTy = R.getType();
|
||||
if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
|
||||
|
|
|
@ -158,6 +158,40 @@ private:
|
|||
// Map between GOT relocation value and corresponding GOT offset
|
||||
std::map<RelocationValueRef, uint64_t> GOTOffsetMap;
|
||||
|
||||
/// The ID of the current IFunc stub section
|
||||
unsigned IFuncStubSectionID = 0;
|
||||
/// The current offset into the IFunc stub section
|
||||
uint64_t IFuncStubOffset = 0;
|
||||
|
||||
/// A IFunc stub and its original symbol
|
||||
struct IFuncStub {
|
||||
/// The offset of this stub in the IFunc stub section
|
||||
uint64_t StubOffset;
|
||||
/// The symbol table entry of the original symbol
|
||||
SymbolTableEntry OriginalSymbol;
|
||||
};
|
||||
|
||||
/// The IFunc stubs
|
||||
SmallVector<IFuncStub, 2> IFuncStubs;
|
||||
|
||||
/// Create the code for the IFunc resolver at the given address. This code
|
||||
/// works together with the stubs created in createIFuncStub() to call the
|
||||
/// resolver function and then jump to the real function address.
|
||||
/// It must not be larger than 64B.
|
||||
void createIFuncResolver(uint8_t *Addr) const;
|
||||
/// Create the code for an IFunc stub for the IFunc that is defined in
|
||||
/// section IFuncSectionID at offset IFuncOffset. The IFunc resolver created
|
||||
/// by createIFuncResolver() is defined in the section IFuncStubSectionID at
|
||||
/// offset IFuncResolverOffset. The code should be written into the section
|
||||
/// with the id IFuncStubSectionID at the offset IFuncStubOffset.
|
||||
void createIFuncStub(unsigned IFuncStubSectionID,
|
||||
uint64_t IFuncResolverOffset, uint64_t IFuncStubOffset,
|
||||
unsigned IFuncSectionID, uint64_t IFuncOffset);
|
||||
/// Return the maximum size of a stub created by createIFuncStub()
|
||||
unsigned getMaxIFuncStubSize() const;
|
||||
|
||||
void processNewSymbol(const SymbolRef &ObjSymbol,
|
||||
SymbolTableEntry &Entry) override;
|
||||
bool relocationNeedsGot(const RelocationRef &R) const override;
|
||||
bool relocationNeedsStub(const RelocationRef &R) const override;
|
||||
|
||||
|
|
|
@ -435,6 +435,10 @@ protected:
|
|||
// Return size of Global Offset Table (GOT) entry
|
||||
virtual size_t getGOTEntrySize() { return 0; }
|
||||
|
||||
// Hook for the subclasses to do further processing when a symbol is added to
|
||||
// the global symbol table. This function may modify the symbol table entry.
|
||||
virtual void processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Entry) {}
|
||||
|
||||
// Return true if the relocation R may require allocating a GOT entry.
|
||||
virtual bool relocationNeedsGot(const RelocationRef &R) const {
|
||||
return false;
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
# RUN: rm -rf %t && mkdir -p %t
|
||||
# RUN: split-file %s %t
|
||||
# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/test_runner.o %t/test_runner.s
|
||||
# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/func_defs.o %t/func_defs.s
|
||||
# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -verify -check=%s %t/test_runner.o %t/func_defs.o
|
||||
# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -execute %t/test_runner.o %t/func_defs.o
|
||||
|
||||
#--- test_runner.s
|
||||
|
||||
# The _main function of this file contains calls to the two external functions
|
||||
# "indirect_func" and "normal_func" that are not yet defined. They are called via
|
||||
# the PLT to simulate how a compiler would emit a call to an external function.
|
||||
# Eventually, indirect_func will resolve to a STT_GNU_IFUNC and normal_func to a
|
||||
# regular function. We include calls to both types of functions in this test to
|
||||
# test that both types of functions are executed correctly when their types are
|
||||
# not known initially.
|
||||
# It also contains a call to a locally defined indirect function. As RuntimeDyld
|
||||
# treats local functions a bit differently than external functions, we also test
|
||||
# that.
|
||||
# Verify that the functions return the excpeted value. If the external indirect
|
||||
# function call fails, this returns the error code 1. If the external normal
|
||||
# function call fails, it's the error code 2. If the call to the locally
|
||||
# defined indirect function fails, return the error code 3.
|
||||
|
||||
local_real_func:
|
||||
mov $0x56, %eax
|
||||
ret
|
||||
|
||||
local_indirect_func_resolver:
|
||||
lea local_real_func(%rip), %rax
|
||||
ret
|
||||
|
||||
.type local_indirect_func, @gnu_indirect_function
|
||||
.set local_indirect_func, local_indirect_func_resolver
|
||||
|
||||
.global _main
|
||||
_main:
|
||||
call indirect_func@plt
|
||||
cmp $0x12, %eax
|
||||
je 1f
|
||||
mov $1, %eax
|
||||
ret
|
||||
1:
|
||||
|
||||
call normal_func@plt
|
||||
cmp $0x34, %eax
|
||||
je 1f
|
||||
mov $2, %eax
|
||||
ret
|
||||
1:
|
||||
|
||||
call local_indirect_func@plt
|
||||
cmp $0x56, %eax
|
||||
je 1f
|
||||
mov $3, %eax
|
||||
ret
|
||||
1:
|
||||
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# Test that the indirect functions have the same addresses in both calls.
|
||||
# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2)
|
||||
test_indirect_func_address_1:
|
||||
lea indirect_func(%rip), %rax
|
||||
|
||||
test_indirect_func_address_2:
|
||||
lea indirect_func(%rip), %rax
|
||||
|
||||
# rtdyld-check: decode_operand(test_local_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_local_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2)
|
||||
test_local_indirect_func_address_1:
|
||||
lea local_indirect_func(%rip), %rax
|
||||
|
||||
test_local_indirect_func_address_2:
|
||||
lea local_indirect_func(%rip), %rax
|
||||
|
||||
#--- func_defs.s
|
||||
|
||||
# This file contains the external functions that are called above. The type of
|
||||
# the indirect function is set to @gnu_indirect_function and its value is set
|
||||
# to the value of ifunc_resolver. This is what gcc emits when using
|
||||
# __attribute__((ifunc("ifunc_resolver"))) in C. The resolver function just
|
||||
# returns the address of the real function "real_func".
|
||||
# To test that everyting works correctly, the indirect function returns 0x12
|
||||
# and the direct function returns 0x23. This is verified in the _main function
|
||||
# above.
|
||||
|
||||
real_func:
|
||||
mov $0x12, %eax
|
||||
ret
|
||||
|
||||
ifunc_resolver:
|
||||
lea real_func(%rip), %rax
|
||||
ret
|
||||
|
||||
.global indirect_func
|
||||
.type indirect_func, @gnu_indirect_function
|
||||
.set indirect_func, ifunc_resolver
|
||||
|
||||
.global normal_func
|
||||
normal_func:
|
||||
mov $0x34, %eax
|
||||
ret
|
||||
|
||||
# Test that the address of the indirect function is equal even when it is
|
||||
# defined in another object file.
|
||||
# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_3, 4) + next_pc(test_indirect_func_address_3)
|
||||
test_indirect_func_address_3:
|
||||
lea indirect_func(%rip), %rax
|
Loading…
Reference in New Issue