[lld][ELF] Support adrp+ldr GOT optimization for AArch64

This diff adds first bits to support relocation relaxations for AArch64
discussed on https://github.com/ARM-software/abi-aa/pull/106.
In particular, the case of

adrp x0, :got: symbol
ldr x0, [x0, :got_lo12: symbol]

is handled.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D112063
This commit is contained in:
Alexander Shaposhnikov 2022-01-10 05:20:37 +00:00
parent 6b8362eb8d
commit 8acc3b4ab0
5 changed files with 305 additions and 6 deletions

View File

@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
}
AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
if (!config->relax || config->emachine != EM_AARCH64) {
safeToRelaxAdrpLdr = false;
return;
}
// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
// always appear in pairs.
size_t i = 0;
const size_t size = relocs.size();
for (; i != size; ++i) {
if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
++i;
continue;
}
break;
} else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
break;
}
}
safeToRelaxAdrpLdr = i == size;
}
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
const Relocation &ldrRel, uint64_t secAddr,
uint8_t *buf) const {
if (!safeToRelaxAdrpLdr)
return false;
// When the definition of sym is not preemptible then we may
// be able to relax
// ADRP xn, :got: sym
// LDR xn, [ xn :got_lo12: sym]
// to
// ADRP xn, sym
// ADD xn, xn, :lo_12: sym
if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
return false;
// Check if the relocations apply to consecutive instructions.
if (adrpRel.offset + 4 != ldrRel.offset)
return false;
// Check if the relocations reference the same symbol and
// skip undefined, preemptible and STT_GNU_IFUNC symbols.
if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
return false;
// Check if the addends of the both instructions are zero.
if (adrpRel.addend != 0 || ldrRel.addend != 0)
return false;
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
uint32_t ldrInstr = read32le(buf + ldrRel.offset);
// Check if the first instruction is ADRP and the second instruction is LDR.
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
(ldrInstr & 0x3b000000) != 0x39000000)
return false;
// Check the value of the sf bit.
if (!(ldrInstr >> 31))
return false;
uint32_t adrpDestReg = adrpInstr & 0x1f;
uint32_t ldrDestReg = ldrInstr & 0x1f;
uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
// Check if ADPR and LDR use the same register.
if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
return false;
Symbol &sym = *adrpRel.sym;
// Check if the address difference is within 4GB range.
int64_t val =
getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
if (val != llvm::SignExtend64(val, 33))
return false;
Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
adrpRel.offset, /*addend=*/0, &sym};
Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
/*addend=*/0, &sym};
// adrp x_<dest_reg>
write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
// add x_<dest reg>, x_<dest reg>
write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
target->relocate(buf + adrpSymRel.offset, adrpSymRel,
SignExtend64(getAArch64Page(sym.getVA()) -
getAArch64Page(secAddr + adrpSymRel.offset),
64));
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
return true;
}
// AArch64 may use security features in variant PLT sequences. These are:
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used

View File

@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
const unsigned bits = config->wordsize * 8;
const TargetInfo &target = *elf::target;
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
for (const Relocation &rel : relocations) {
AArch64Relaxer aarch64relaxer(relocations);
for (size_t i = 0, size = relocations.size(); i != size; ++i) {
const Relocation &rel = relocations[i];
if (rel.expr == R_NONE)
continue;
uint64_t offset = rel.offset;
uint8_t *bufLoc = buf + offset;
uint64_t addrLoc = getOutputSection()->addr + offset;
uint64_t secAddr = getOutputSection()->addr;
if (auto *sec = dyn_cast<InputSection>(this))
addrLoc += sec->outSecOff;
secAddr += sec->outSecOff;
const uint64_t addrLoc = secAddr + offset;
const uint64_t targetVA =
SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc,
*rel.sym, rel.expr), bits);
*rel.sym, rel.expr),
bits);
switch (rel.expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
target.relaxGot(bufLoc, rel, targetVA);
break;
case R_AARCH64_GOT_PAGE_PC:
if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr(
rel, relocations[i + 1], secAddr, buf)) {
++i;
continue;
}
target.relocate(bufLoc, rel, targetVA);
break;
case R_PPC64_RELAX_GOT_PC: {
// The R_PPC64_PCREL_OPT relocation must appear immediately after
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.

View File

@ -221,6 +221,16 @@ void addPPC64SaveRestore();
uint64_t getPPC64TocBase();
uint64_t getAArch64Page(uint64_t expr);
class AArch64Relaxer {
bool safeToRelaxAdrpLdr = true;
public:
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
uint64_t secAddr, uint8_t *buf) const;
};
extern const TargetInfo *target;
TargetInfo *getTarget();

View File

@ -0,0 +1,70 @@
## This test verifies that the pair adrp + ldr is relaxed/not relaxed
## depending on the target symbol properties.
# REQUIRES: aarch64
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o
# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \
# RUN: FileCheck --check-prefix=LIB %s
## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied.
LIB: adrp x0
LIB-NEXT: add x0
## Symbol 'global_sym' is preemptible, no relaxations should be applied.
LIB-NEXT: adrp x1
LIB-NEXT: ldr x1
## Symbol 'undefined_sym' is undefined, no relaxations should be applied.
LIB-NEXT: adrp x2
LIB-NEXT: ldr x2
## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied.
LIB-NEXT: adrp x3
LIB-NEXT: ldr x3
# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols
# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \
# RUN: FileCheck --check-prefix=EXE %s
## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied.
EXE: adrp x1
EXE-NEXT: add x1
## The linker script ensures that .rodata and .text are sufficiently (>1MB)
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
#--- linker.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x300100: { *(.text) }
}
#--- symbols.s
.rodata
.hidden hidden_sym
hidden_sym:
.word 10
.global global_sym
global_sym:
.word 10
.text
.type ifunc_sym STT_GNU_IFUNC
.hidden ifunc_sym
ifunc_sym:
nop
.global _start
_start:
adrp x0, :got:hidden_sym
ldr x0, [x0, #:got_lo12:hidden_sym]
adrp x1, :got:global_sym
ldr x1, [x1, #:got_lo12:global_sym]
adrp x2, :got:undefined_sym
ldr x2, [x2, #:got_lo12:undefined_sym]
adrp x3, :got:ifunc_sym
ldr x3, [x3, #:got_lo12:ifunc_sym]

View File

@ -0,0 +1,117 @@
# REQUIRES: aarch64
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
## This test verifies the encoding when the register x1 is used.
# CHECK: adrp x1
# CHECK-NEXT: add x1, x1
## ADRP contains a nonzero addend, no relaxations should be applied.
# CHECK-NEXT: adrp x2
# CHECK-NEXT: ldr
## LDR contains a nonzero addend, no relaxations should be applied.
# CHECK-NEXT: adrp x3
# CHECK-NEXT: ldr
## LDR and ADRP use different registers, no relaxations should be applied.
# CHECK-NEXT: adrp x4
# CHECK-NEXT: ldr
## LDR and ADRP use different registers, no relaxations should be applied.
# CHECK-NEXT: adrp x6
# CHECK-NEXT: ldr
## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
# FileCheck --check-prefix=X1-NO-RELAX %s
# X1-NO-RELAX: adrp x1
# X1-NO-RELAX-NEXT: ldr
## Symbol 'x' is nonpreemptible, but the address is not within adrp range.
# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range
# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
## Relocations do not appear in pairs, no relaxations should be applied.
# RUN: ld.lld %t/unpaired.o -o %t/unpaired
# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \
# RUN: FileCheck --check-prefix=UNPAIRED %s
# UNPAIRED: adrp x0
# UNPAIRED-NEXT: b
# UNPAIRED-NEXT: adrp x0
# UNPAIRED: ldr x0
## Relocations do not appear in pairs, no relaxations should be applied.
# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr
# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \
# RUN: FileCheck --check-prefix=LONE-LDR %s
# LONE-LDR: ldr x0
## This linker script ensures that .rodata and .text are sufficiently (>1M)
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
#--- linker.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x200100: { *(.text) }
}
## This linker script ensures that .rodata and .text are sufficiently (>4GB)
## far apart so that the adrp + ldr pair cannot be relaxed.
#--- out-of-range.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x100002000: { *(.text) }
}
#--- a.s
.rodata
.hidden x
x:
.word 10
.text
.global _start
_start:
adrp x1, :got:x
ldr x1, [x1, #:got_lo12:x]
adrp x2, :got:x+1
ldr x2, [x2, #:got_lo12:x]
adrp x3, :got:x
ldr x3, [x3, #:got_lo12:x+8]
adrp x4, :got:x
ldr x5, [x4, #:got_lo12:x]
adrp x6, :got:x
ldr x6, [x0, #:got_lo12:x]
#--- unpaired.s
.text
.hidden x
x:
nop
.global _start
_start:
adrp x0, :got:x
b L
adrp x0, :got:x
L:
ldr x0, [x0, #:got_lo12:x]
#--- lone-ldr.s
.text
.hidden x
x:
nop
.global _start
_start:
ldr x0, [x0, #:got_lo12:x]