[lld][ELF] Support adrp+ldr GOT optimization for AArch64
This diff adds first bits to support relocation relaxations for AArch64 discussed on https://github.com/ARM-software/abi-aa/pull/106. In particular, the case of adrp x0, :got: symbol ldr x0, [x0, :got_lo12: symbol] is handled. Test plan: make check-all Differential revision: https://reviews.llvm.org/D112063
This commit is contained in:
parent
6b8362eb8d
commit
8acc3b4ab0
|
@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
|
|||
llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
|
||||
}
|
||||
|
||||
AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
|
||||
if (!config->relax || config->emachine != EM_AARCH64) {
|
||||
safeToRelaxAdrpLdr = false;
|
||||
return;
|
||||
}
|
||||
// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
|
||||
// always appear in pairs.
|
||||
size_t i = 0;
|
||||
const size_t size = relocs.size();
|
||||
for (; i != size; ++i) {
|
||||
if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
|
||||
if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
safeToRelaxAdrpLdr = i == size;
|
||||
}
|
||||
|
||||
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
|
||||
const Relocation &ldrRel, uint64_t secAddr,
|
||||
uint8_t *buf) const {
|
||||
if (!safeToRelaxAdrpLdr)
|
||||
return false;
|
||||
|
||||
// When the definition of sym is not preemptible then we may
|
||||
// be able to relax
|
||||
// ADRP xn, :got: sym
|
||||
// LDR xn, [ xn :got_lo12: sym]
|
||||
// to
|
||||
// ADRP xn, sym
|
||||
// ADD xn, xn, :lo_12: sym
|
||||
|
||||
if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
|
||||
ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
|
||||
return false;
|
||||
// Check if the relocations apply to consecutive instructions.
|
||||
if (adrpRel.offset + 4 != ldrRel.offset)
|
||||
return false;
|
||||
// Check if the relocations reference the same symbol and
|
||||
// skip undefined, preemptible and STT_GNU_IFUNC symbols.
|
||||
if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
|
||||
adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
|
||||
return false;
|
||||
// Check if the addends of the both instructions are zero.
|
||||
if (adrpRel.addend != 0 || ldrRel.addend != 0)
|
||||
return false;
|
||||
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
|
||||
uint32_t ldrInstr = read32le(buf + ldrRel.offset);
|
||||
// Check if the first instruction is ADRP and the second instruction is LDR.
|
||||
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
|
||||
(ldrInstr & 0x3b000000) != 0x39000000)
|
||||
return false;
|
||||
// Check the value of the sf bit.
|
||||
if (!(ldrInstr >> 31))
|
||||
return false;
|
||||
uint32_t adrpDestReg = adrpInstr & 0x1f;
|
||||
uint32_t ldrDestReg = ldrInstr & 0x1f;
|
||||
uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
|
||||
// Check if ADPR and LDR use the same register.
|
||||
if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
|
||||
return false;
|
||||
|
||||
Symbol &sym = *adrpRel.sym;
|
||||
// Check if the address difference is within 4GB range.
|
||||
int64_t val =
|
||||
getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
|
||||
if (val != llvm::SignExtend64(val, 33))
|
||||
return false;
|
||||
|
||||
Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
|
||||
adrpRel.offset, /*addend=*/0, &sym};
|
||||
Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
|
||||
/*addend=*/0, &sym};
|
||||
|
||||
// adrp x_<dest_reg>
|
||||
write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
|
||||
// add x_<dest reg>, x_<dest reg>
|
||||
write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
|
||||
|
||||
target->relocate(buf + adrpSymRel.offset, adrpSymRel,
|
||||
SignExtend64(getAArch64Page(sym.getVA()) -
|
||||
getAArch64Page(secAddr + adrpSymRel.offset),
|
||||
64));
|
||||
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
|
||||
return true;
|
||||
}
|
||||
|
||||
// AArch64 may use security features in variant PLT sequences. These are:
|
||||
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
|
||||
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
|
||||
|
|
|
@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
|
|||
const unsigned bits = config->wordsize * 8;
|
||||
const TargetInfo &target = *elf::target;
|
||||
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
|
||||
|
||||
for (const Relocation &rel : relocations) {
|
||||
AArch64Relaxer aarch64relaxer(relocations);
|
||||
for (size_t i = 0, size = relocations.size(); i != size; ++i) {
|
||||
const Relocation &rel = relocations[i];
|
||||
if (rel.expr == R_NONE)
|
||||
continue;
|
||||
uint64_t offset = rel.offset;
|
||||
uint8_t *bufLoc = buf + offset;
|
||||
|
||||
uint64_t addrLoc = getOutputSection()->addr + offset;
|
||||
uint64_t secAddr = getOutputSection()->addr;
|
||||
if (auto *sec = dyn_cast<InputSection>(this))
|
||||
addrLoc += sec->outSecOff;
|
||||
secAddr += sec->outSecOff;
|
||||
const uint64_t addrLoc = secAddr + offset;
|
||||
const uint64_t targetVA =
|
||||
SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc,
|
||||
*rel.sym, rel.expr), bits);
|
||||
|
||||
*rel.sym, rel.expr),
|
||||
bits);
|
||||
switch (rel.expr) {
|
||||
case R_RELAX_GOT_PC:
|
||||
case R_RELAX_GOT_PC_NOPIC:
|
||||
target.relaxGot(bufLoc, rel, targetVA);
|
||||
break;
|
||||
case R_AARCH64_GOT_PAGE_PC:
|
||||
if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr(
|
||||
rel, relocations[i + 1], secAddr, buf)) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
target.relocate(bufLoc, rel, targetVA);
|
||||
break;
|
||||
case R_PPC64_RELAX_GOT_PC: {
|
||||
// The R_PPC64_PCREL_OPT relocation must appear immediately after
|
||||
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
|
||||
|
|
|
@ -221,6 +221,16 @@ void addPPC64SaveRestore();
|
|||
uint64_t getPPC64TocBase();
|
||||
uint64_t getAArch64Page(uint64_t expr);
|
||||
|
||||
class AArch64Relaxer {
|
||||
bool safeToRelaxAdrpLdr = true;
|
||||
|
||||
public:
|
||||
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
|
||||
|
||||
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
|
||||
uint64_t secAddr, uint8_t *buf) const;
|
||||
};
|
||||
|
||||
extern const TargetInfo *target;
|
||||
TargetInfo *getTarget();
|
||||
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
## This test verifies that the pair adrp + ldr is relaxed/not relaxed
|
||||
## depending on the target symbol properties.
|
||||
|
||||
# REQUIRES: aarch64
|
||||
# RUN: split-file %s %t
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o
|
||||
|
||||
# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \
|
||||
# RUN: FileCheck --check-prefix=LIB %s
|
||||
|
||||
## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied.
|
||||
LIB: adrp x0
|
||||
LIB-NEXT: add x0
|
||||
|
||||
## Symbol 'global_sym' is preemptible, no relaxations should be applied.
|
||||
LIB-NEXT: adrp x1
|
||||
LIB-NEXT: ldr x1
|
||||
|
||||
## Symbol 'undefined_sym' is undefined, no relaxations should be applied.
|
||||
LIB-NEXT: adrp x2
|
||||
LIB-NEXT: ldr x2
|
||||
|
||||
## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied.
|
||||
LIB-NEXT: adrp x3
|
||||
LIB-NEXT: ldr x3
|
||||
|
||||
# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \
|
||||
# RUN: FileCheck --check-prefix=EXE %s
|
||||
|
||||
## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied.
|
||||
EXE: adrp x1
|
||||
EXE-NEXT: add x1
|
||||
|
||||
## The linker script ensures that .rodata and .text are sufficiently (>1MB)
|
||||
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
|
||||
#--- linker.t
|
||||
SECTIONS {
|
||||
.rodata 0x1000: { *(.rodata) }
|
||||
.text 0x300100: { *(.text) }
|
||||
}
|
||||
|
||||
#--- symbols.s
|
||||
.rodata
|
||||
.hidden hidden_sym
|
||||
hidden_sym:
|
||||
.word 10
|
||||
|
||||
.global global_sym
|
||||
global_sym:
|
||||
.word 10
|
||||
|
||||
.text
|
||||
.type ifunc_sym STT_GNU_IFUNC
|
||||
.hidden ifunc_sym
|
||||
ifunc_sym:
|
||||
nop
|
||||
|
||||
.global _start
|
||||
_start:
|
||||
adrp x0, :got:hidden_sym
|
||||
ldr x0, [x0, #:got_lo12:hidden_sym]
|
||||
adrp x1, :got:global_sym
|
||||
ldr x1, [x1, #:got_lo12:global_sym]
|
||||
adrp x2, :got:undefined_sym
|
||||
ldr x2, [x2, #:got_lo12:undefined_sym]
|
||||
adrp x3, :got:ifunc_sym
|
||||
ldr x3, [x3, #:got_lo12:ifunc_sym]
|
|
@ -0,0 +1,117 @@
|
|||
# REQUIRES: aarch64
|
||||
# RUN: split-file %s %t
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
|
||||
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
|
||||
|
||||
# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
|
||||
|
||||
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
|
||||
## This test verifies the encoding when the register x1 is used.
|
||||
# CHECK: adrp x1
|
||||
# CHECK-NEXT: add x1, x1
|
||||
|
||||
## ADRP contains a nonzero addend, no relaxations should be applied.
|
||||
# CHECK-NEXT: adrp x2
|
||||
# CHECK-NEXT: ldr
|
||||
|
||||
## LDR contains a nonzero addend, no relaxations should be applied.
|
||||
# CHECK-NEXT: adrp x3
|
||||
# CHECK-NEXT: ldr
|
||||
|
||||
## LDR and ADRP use different registers, no relaxations should be applied.
|
||||
# CHECK-NEXT: adrp x4
|
||||
# CHECK-NEXT: ldr
|
||||
|
||||
## LDR and ADRP use different registers, no relaxations should be applied.
|
||||
# CHECK-NEXT: adrp x6
|
||||
# CHECK-NEXT: ldr
|
||||
|
||||
## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
|
||||
# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
|
||||
# FileCheck --check-prefix=X1-NO-RELAX %s
|
||||
|
||||
# X1-NO-RELAX: adrp x1
|
||||
# X1-NO-RELAX-NEXT: ldr
|
||||
|
||||
## Symbol 'x' is nonpreemptible, but the address is not within adrp range.
|
||||
# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \
|
||||
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
|
||||
|
||||
## Relocations do not appear in pairs, no relaxations should be applied.
|
||||
# RUN: ld.lld %t/unpaired.o -o %t/unpaired
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \
|
||||
# RUN: FileCheck --check-prefix=UNPAIRED %s
|
||||
|
||||
# UNPAIRED: adrp x0
|
||||
# UNPAIRED-NEXT: b
|
||||
# UNPAIRED-NEXT: adrp x0
|
||||
# UNPAIRED: ldr x0
|
||||
|
||||
## Relocations do not appear in pairs, no relaxations should be applied.
|
||||
# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr
|
||||
# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \
|
||||
# RUN: FileCheck --check-prefix=LONE-LDR %s
|
||||
|
||||
# LONE-LDR: ldr x0
|
||||
|
||||
## This linker script ensures that .rodata and .text are sufficiently (>1M)
|
||||
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
|
||||
#--- linker.t
|
||||
SECTIONS {
|
||||
.rodata 0x1000: { *(.rodata) }
|
||||
.text 0x200100: { *(.text) }
|
||||
}
|
||||
|
||||
## This linker script ensures that .rodata and .text are sufficiently (>4GB)
|
||||
## far apart so that the adrp + ldr pair cannot be relaxed.
|
||||
#--- out-of-range.t
|
||||
SECTIONS {
|
||||
.rodata 0x1000: { *(.rodata) }
|
||||
.text 0x100002000: { *(.text) }
|
||||
}
|
||||
|
||||
#--- a.s
|
||||
.rodata
|
||||
.hidden x
|
||||
x:
|
||||
.word 10
|
||||
.text
|
||||
.global _start
|
||||
_start:
|
||||
adrp x1, :got:x
|
||||
ldr x1, [x1, #:got_lo12:x]
|
||||
adrp x2, :got:x+1
|
||||
ldr x2, [x2, #:got_lo12:x]
|
||||
adrp x3, :got:x
|
||||
ldr x3, [x3, #:got_lo12:x+8]
|
||||
adrp x4, :got:x
|
||||
ldr x5, [x4, #:got_lo12:x]
|
||||
adrp x6, :got:x
|
||||
ldr x6, [x0, #:got_lo12:x]
|
||||
|
||||
#--- unpaired.s
|
||||
.text
|
||||
.hidden x
|
||||
x:
|
||||
nop
|
||||
.global _start
|
||||
_start:
|
||||
adrp x0, :got:x
|
||||
b L
|
||||
adrp x0, :got:x
|
||||
L:
|
||||
ldr x0, [x0, #:got_lo12:x]
|
||||
|
||||
#--- lone-ldr.s
|
||||
.text
|
||||
.hidden x
|
||||
x:
|
||||
nop
|
||||
.global _start
|
||||
_start:
|
||||
ldr x0, [x0, #:got_lo12:x]
|
Loading…
Reference in New Issue