[BOLT] Add support for GOTPCRELX relocations
The linker can convert instructions with GOTPCRELX relocations into a form that uses an absolute addressing with an immediate. BOLT needs to recognize such conversions and symbolize the immediates. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126747
This commit is contained in:
parent
ffe86e3bdd
commit
1817642684
|
@ -842,6 +842,20 @@ public:
|
|||
return (RI == Relocations.end()) ? nullptr : &RI->second;
|
||||
}
|
||||
|
||||
/// Return the first relocation in the function that starts at an address in
|
||||
/// the [StartOffset, EndOffset) range. Return nullptr if no such relocation
|
||||
/// exists.
|
||||
const Relocation *getRelocationInRange(uint64_t StartOffset,
|
||||
uint64_t EndOffset) const {
|
||||
assert(CurrentState == State::Empty &&
|
||||
"Relocations unavailable in the current function state.");
|
||||
auto RI = Relocations.lower_bound(StartOffset);
|
||||
if (RI != Relocations.end() && RI->first < EndOffset)
|
||||
return &RI->second;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Returns the raw binary encoding of this function.
|
||||
ErrorOr<ArrayRef<uint8_t>> getData() const;
|
||||
|
||||
|
@ -1314,11 +1328,11 @@ public:
|
|||
case ELF::R_X86_64_PC8:
|
||||
case ELF::R_X86_64_PC32:
|
||||
case ELF::R_X86_64_PC64:
|
||||
case ELF::R_X86_64_GOTPCRELX:
|
||||
case ELF::R_X86_64_REX_GOTPCRELX:
|
||||
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
|
||||
return;
|
||||
case ELF::R_X86_64_PLT32:
|
||||
case ELF::R_X86_64_GOTPCRELX:
|
||||
case ELF::R_X86_64_REX_GOTPCRELX:
|
||||
case ELF::R_X86_64_GOTPCREL:
|
||||
case ELF::R_X86_64_TPOFF32:
|
||||
case ELF::R_X86_64_GOTTPOFF:
|
||||
|
|
|
@ -77,6 +77,9 @@ struct Relocation {
|
|||
/// Return true if relocation type implies the creation of a GOT entry
|
||||
static bool isGOT(uint64_t Type);
|
||||
|
||||
/// Special relocation type that allows the linker to modify the instruction.
|
||||
static bool isX86GOTPCRELX(uint64_t Type);
|
||||
|
||||
/// Return true if relocation type is NONE
|
||||
static bool isNone(uint64_t Type);
|
||||
|
||||
|
|
|
@ -562,6 +562,12 @@ bool Relocation::isGOT(uint64_t Type) {
|
|||
return isGOTX86(Type);
|
||||
}
|
||||
|
||||
bool Relocation::isX86GOTPCRELX(uint64_t Type) {
|
||||
if (Arch != Triple::x86_64)
|
||||
return false;
|
||||
return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
|
||||
}
|
||||
|
||||
bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }
|
||||
|
||||
bool Relocation::isRelative(uint64_t Type) {
|
||||
|
|
|
@ -47,54 +47,88 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
|
|||
Inst.addOperand(MCOperand::createExpr(Expr));
|
||||
};
|
||||
|
||||
// Check for relocations against the operand.
|
||||
// Check if the operand being added is a displacement part of a compound
|
||||
// memory operand that uses PC-relative addressing. If it is, try to symbolize
|
||||
// it without relocations. Return true on success, false otherwise.
|
||||
auto processPCRelOperandNoRel = [&]() {
|
||||
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
|
||||
if (MemOp == -1)
|
||||
return false;
|
||||
|
||||
const unsigned DispOp = MemOp + X86::AddrDisp;
|
||||
if (Inst.getNumOperands() != DispOp)
|
||||
return false;
|
||||
|
||||
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
|
||||
if (Base.getReg() != BC.MRI->getProgramCounter())
|
||||
return false;
|
||||
|
||||
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
|
||||
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
|
||||
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
|
||||
return false;
|
||||
|
||||
const MCSymbol *TargetSymbol;
|
||||
uint64_t TargetOffset;
|
||||
std::tie(TargetSymbol, TargetOffset) =
|
||||
BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
|
||||
|
||||
addOperand(TargetSymbol, TargetOffset);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Check for GOTPCRELX relocations first. Because these relocations allow the
|
||||
// linker to modify the instruction, we have to check the offset range
|
||||
// corresponding to the instruction, not the offset of the operand.
|
||||
// Note that if there is GOTPCRELX relocation against the instruction, there
|
||||
// will be no other relocation in this range, since GOTPCRELX applies only to
|
||||
// certain instruction types.
|
||||
const uint64_t InstOffset = InstAddress - Function.getAddress();
|
||||
if (const Relocation *Relocation =
|
||||
Function.getRelocationAt(InstOffset + ImmOffset)) {
|
||||
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
|
||||
if (Relocation->isPCRelative())
|
||||
SymbolValue += InstAddress + ImmOffset;
|
||||
const Relocation *Relocation =
|
||||
Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
|
||||
if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
|
||||
// If the operand is PC-relative, convert it without using the relocation
|
||||
// information. For GOTPCRELX, it is safe to use the absolute address
|
||||
// instead of extracting the addend from the relocation, as non-standard
|
||||
// forms will be rejected by linker conversion process and the operand
|
||||
// will always reference GOT which we don't rewrite.
|
||||
if (processPCRelOperandNoRel())
|
||||
return true;
|
||||
|
||||
// Process reference to the symbol.
|
||||
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
|
||||
// The linker converted the PC-relative address to an absolute one.
|
||||
// Symbolize this address.
|
||||
BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
|
||||
const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
|
||||
assert(Target &&
|
||||
"BinaryData should exist at converted GOTPCRELX destination");
|
||||
|
||||
uint64_t Addend = Relocation->Addend;
|
||||
// Real addend for pc-relative targets is adjusted with a delta from
|
||||
// the relocation placement to the next instruction.
|
||||
if (Relocation->isPCRelative())
|
||||
Addend += InstOffset + InstSize - Relocation->Offset;
|
||||
|
||||
addOperand(Relocation->Symbol, Addend);
|
||||
addOperand(Target->getSymbol(), /*Addend=*/0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the operand being added is a displacement part of a compound
|
||||
// memory operand that uses PC-relative addressing. If it is, try to symbolize
|
||||
// it without relocations.
|
||||
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
|
||||
if (MemOp == -1)
|
||||
return false;
|
||||
// Check for relocations against the operand.
|
||||
if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
|
||||
Relocation = Function.getRelocationAt(InstOffset + ImmOffset);
|
||||
|
||||
const unsigned DispOp = MemOp + X86::AddrDisp;
|
||||
if (Inst.getNumOperands() != DispOp)
|
||||
return false;
|
||||
if (!Relocation)
|
||||
return processPCRelOperandNoRel();
|
||||
|
||||
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
|
||||
if (Base.getReg() != BC.MRI->getProgramCounter())
|
||||
return false;
|
||||
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
|
||||
if (Relocation->isPCRelative())
|
||||
SymbolValue += InstAddress + ImmOffset;
|
||||
|
||||
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
|
||||
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
|
||||
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
|
||||
return false;
|
||||
// Process reference to the symbol.
|
||||
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
|
||||
|
||||
const MCSymbol *TargetSymbol;
|
||||
uint64_t TargetOffset;
|
||||
std::tie(TargetSymbol, TargetOffset) =
|
||||
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
|
||||
uint64_t Addend = Relocation->Addend;
|
||||
// Real addend for pc-relative targets is adjusted with a delta from
|
||||
// the relocation placement to the next instruction.
|
||||
if (Relocation->isPCRelative())
|
||||
Addend += InstOffset + InstSize - Relocation->Offset;
|
||||
|
||||
addOperand(TargetSymbol, TargetOffset);
|
||||
addOperand(Relocation->Symbol, Addend);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,46 +1,69 @@
|
|||
# This reproduces a bug with misinterpreting the gotpcrelx reloc
|
||||
|
||||
# Here we use llvm-mc -relax-relocations to produce R_X86_64_REX_GOTPCRELX
|
||||
# and ld.lld to consume it and optimize it, transforming a CMP <mem, reg>
|
||||
# into CMP <imm, reg>.
|
||||
# Then we check that BOLT updates correctly the imm operand that references
|
||||
# a function address. Currently XFAIL as we do not support it.
|
||||
|
||||
# REQUIRES: system-linux
|
||||
# XFAIL: *
|
||||
|
||||
## Check that BOLT correctly handles different types of instructions with
|
||||
## R_X86_64_GOTPCRELX or R_X86_64_REX_GOTPCRELX relocations and different
|
||||
## kinds of handling of the relocation by the linker (no relaxation, pic, and
|
||||
## non-pic).
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux \
|
||||
# RUN: -relax-relocations %s -o %t.o
|
||||
# RUN: llvm-strip --strip-unneeded %t.o
|
||||
# RUN: ld.lld %t.o -o %t.exe -q
|
||||
# RUN: llvm-readobj -r %t.exe | FileCheck --check-prefix=READOBJ %s
|
||||
# RUN: llvm-bolt %t.exe -relocs -o %t.out -lite=0
|
||||
# RUN: ld.lld %t.o -o %t.pie.exe -q -pie
|
||||
# RUN: ld.lld %t.o -o %t.no-relax.exe -q --no-relax
|
||||
# RUN: llvm-bolt %t.exe -relocs -o %t.out -print-cfg -print-only=_start \
|
||||
# RUN: |& FileCheck --check-prefix=BOLT %s
|
||||
# RUN: llvm-bolt %t.pie.exe -o /dev/null -print-cfg -print-only=_start \
|
||||
# RUN: |& FileCheck --check-prefix=PIE-BOLT %s
|
||||
# RUN: llvm-bolt %t.no-relax.exe -o /dev/null -print-cfg -print-only=_start \
|
||||
# RUN: |& FileCheck --check-prefix=NO-RELAX-BOLT %s
|
||||
# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex \
|
||||
# RUN: %t.out | FileCheck --check-prefix=DISASM %s
|
||||
|
||||
# Check that R_X86_64_REX_GOTPCRELX is present in the input binary
|
||||
# READOBJ: 0x[[#%X,]] R_X86_64_REX_GOTPCRELX foo 0x[[#%X,]]
|
||||
|
||||
# DISASM: Disassembly of section .text:
|
||||
# DISASM-EMPTY:
|
||||
# DISASM-NEXT: <_start>:
|
||||
# DISASM-NEXT: leaq 0x[[#%x,ADDR:]], %rax
|
||||
# DISASM-NEXT: cmpq 0x[[#ADDR]], %rax
|
||||
|
||||
.text
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
.cfi_startproc
|
||||
leaq foo, %rax
|
||||
cmpq foo@GOTPCREL(%rip), %rax
|
||||
je b
|
||||
c:
|
||||
mov $1, %rdi
|
||||
callq foo
|
||||
b:
|
||||
xorq %rdi, %rdi
|
||||
callq foo
|
||||
# DISASM: Disassembly of section .text:
|
||||
# DISASM-EMPTY:
|
||||
# DISASM-NEXT: <_start>:
|
||||
|
||||
call *foo@GOTPCREL(%rip)
|
||||
# NO-RELAX-BOLT: callq *{{.*}}(%rip)
|
||||
# BOLT: callq foo
|
||||
# PIE-BOLT: callq foo
|
||||
# DISASM-NEXT: callq 0x[[#%x,ADDR:]]
|
||||
|
||||
movq foo@GOTPCREL(%rip), %rdi
|
||||
# NO-RELAX-BOLT-NEXT: movq {{.*}}(%rip), %rdi
|
||||
# BOLT-NEXT: leaq foo(%rip), %rdi
|
||||
# PIE-BOLT-NEXT: leaq foo(%rip), %rdi
|
||||
# DISASM-NEXT: leaq {{.*}}(%rip), %rdi # 0x[[#ADDR]]
|
||||
|
||||
movl foo@GOTPCREL+4(%rip), %edi
|
||||
# NO-RELAX-BOLT-NEXT: movl {{.*}}(%rip), %edi
|
||||
# BOLT-NEXT: movl {{.*}}(%rip), %edi
|
||||
# PIE-BOLT-NEXT: movl {{.*}}(%rip), %edi
|
||||
# DISASM-NEXT: movl {{.*}}(%rip), %edi
|
||||
|
||||
test %rdi, foo@GOTPCREL(%rip)
|
||||
# NO-RELAX-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
|
||||
# BOLT-NEXT: testq $foo, %rdi
|
||||
# PIE-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
|
||||
# DISASM-NEXT: testq $0x[[#ADDR]], %rdi
|
||||
|
||||
cmpq foo@GOTPCREL(%rip), %rax
|
||||
# NO-RELAX-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
|
||||
# BOLT-NEXT: cmpq $foo, %rax
|
||||
# PIE-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
|
||||
# DISASM-NEXT: cmpq $0x[[#ADDR]], %rax
|
||||
|
||||
jmp *foo@GOTPCREL(%rip)
|
||||
# NO-RELAX-BOLT-NEXT: jmpq *DATA{{.*}}(%rip)
|
||||
# BOLT-NEXT: jmp foo
|
||||
# PIE-BOLT-NEXT: jmp foo
|
||||
# DISASM-NEXT: jmp 0x[[#ADDR]]
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _start, .-_start
|
||||
|
|
Loading…
Reference in New Issue