[AArch64] Split fuse-literals feature
This diff splits fuse-literals feature and enables fuse-adrp-add by default,
in particular, it adjusts instruction scheduling to place ADRP+ADD pairs together.
This also enables the linker to apply the relaxations described in
d2ca58c54b
.
Differential revision: https://reviews.llvm.org/D120104
Test plan: make check-all
This commit is contained in:
parent
d16a631c12
commit
626039cdcc
|
@ -250,6 +250,10 @@ def FeatureFuseCryptoEOR : SubtargetFeature<
|
|||
"fuse-crypto-eor", "HasFuseCryptoEOR", "true",
|
||||
"CPU fuses AES/PMULL and EOR operations">;
|
||||
|
||||
def FeatureFuseAdrpAdd : SubtargetFeature<
|
||||
"fuse-adrp-add", "HasFuseAdrpAdd", "true",
|
||||
"CPU fuses adrp+add operations">;
|
||||
|
||||
def FeatureFuseLiterals : SubtargetFeature<
|
||||
"fuse-literals", "HasFuseLiterals", "true",
|
||||
"CPU fuses literal generation operations">;
|
||||
|
@ -660,6 +664,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
|
|||
FeatureFuseAES,
|
||||
FeatureBalanceFPOps,
|
||||
FeatureCustomCheapAsMoveHandling,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals,
|
||||
FeaturePostRAScheduler,
|
||||
FeaturePredictableSelectIsExpensive]>;
|
||||
|
@ -668,11 +673,13 @@ def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
|
|||
"Cortex-A65 ARM processors", [
|
||||
FeatureFuseAES,
|
||||
FeatureFuseAddress,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals]>;
|
||||
|
||||
def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
|
||||
"Cortex-A72 ARM processors", [
|
||||
FeatureFuseAES,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals]>;
|
||||
|
||||
def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
|
||||
|
@ -813,6 +820,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
|
|||
FeatureFuseArithmeticLogic,
|
||||
FeatureFuseCCSelect,
|
||||
FeatureFuseCryptoEOR,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals,
|
||||
FeatureZCRegMove,
|
||||
FeatureZCZeroing]>;
|
||||
|
@ -824,6 +832,7 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
|
|||
FeatureFuseAddress,
|
||||
FeatureFuseAES,
|
||||
FeatureFuseCCSelect,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals,
|
||||
FeatureLSLFast,
|
||||
FeaturePostRAScheduler,
|
||||
|
@ -840,6 +849,7 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
|
|||
FeatureFuseAES,
|
||||
FeatureFuseArithmeticLogic,
|
||||
FeatureFuseCCSelect,
|
||||
FeatureFuseAdrpAdd,
|
||||
FeatureFuseLiterals,
|
||||
FeatureLSLFast,
|
||||
FeaturePostRAScheduler,
|
||||
|
@ -1062,7 +1072,7 @@ def ProcessorFeatures {
|
|||
// by default for users targeting generic AArch64. The extensions do not
|
||||
// affect code generated by the compiler and can be used only by explicitly
|
||||
// mentioning the new system register names in assembly.
|
||||
list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE];
|
||||
list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE, FeatureFuseAdrpAdd];
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -157,16 +157,19 @@ static bool isCryptoEORPair(const MachineInstr *FirstMI,
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool isAdrpAddPair(const MachineInstr *FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
// Assume the 1st instr to be a wildcard if it is unspecified.
|
||||
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
|
||||
SecondMI.getOpcode() == AArch64::ADDXri)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Literal generation.
|
||||
static bool isLiteralsPair(const MachineInstr *FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
// Assume the 1st instr to be a wildcard if it is unspecified.
|
||||
|
||||
// PC relative address.
|
||||
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
|
||||
SecondMI.getOpcode() == AArch64::ADDXri)
|
||||
return true;
|
||||
|
||||
// 32 bit immediate.
|
||||
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
|
||||
(SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||
|
@ -397,6 +400,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
|||
return true;
|
||||
if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
if (ST.hasFuseAdrpAdd() && isAdrpAddPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
|
||||
|
|
|
@ -204,8 +204,8 @@ public:
|
|||
/// Return true if the CPU supports any kind of instruction fusion.
|
||||
bool hasFusion() const {
|
||||
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
|
||||
hasFuseAES() || hasFuseArithmeticLogic() ||
|
||||
hasFuseCCSelect() || hasFuseLiterals();
|
||||
hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
|
||||
hasFuseAdrpAdd() || hasFuseLiterals();
|
||||
}
|
||||
|
||||
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||
|
|
|
@ -56,10 +56,9 @@ define void @baz(i8* %arg) !dbg !6 {
|
|||
; CHECK-NEXT: lsl x8, x0, #4
|
||||
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 // tmp.ll:0:0
|
||||
; CHECK-NEXT: adrp x9, global+202752
|
||||
; CHECK-NEXT: add x9, x9, :lo12:global+202752
|
||||
; CHECK-NEXT: .loc 1 4 1 // tmp.ll:4:1
|
||||
; CHECK-NEXT: and x8, x8, #0x1ff0
|
||||
; CHECK-NEXT: .loc 1 0 0 // tmp.ll:0:0
|
||||
; CHECK-NEXT: add x9, x9, :lo12:global+202752
|
||||
; CHECK-NEXT: .loc 1 5 1 is_stmt 1 // tmp.ll:5:1
|
||||
; CHECK-NEXT: str xzr, [x9, x8]
|
||||
; CHECK-NEXT: .loc 1 6 1 // tmp.ll:6:1
|
||||
|
|
|
@ -457,10 +457,10 @@ define void @caller_in_memory() {
|
|||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: add x8, sp, #8
|
||||
; CHECK-NEXT: bl return_in_memory
|
||||
; CHECK-NEXT: adrp x8, in_memory_store
|
||||
; CHECK-NEXT: ldur q0, [sp, #24]
|
||||
; CHECK-NEXT: ldur q1, [sp, #8]
|
||||
; CHECK-NEXT: adrp x8, in_memory_store
|
||||
; CHECK-NEXT: add x8, x8, :lo12:in_memory_store
|
||||
; CHECK-NEXT: ldur q1, [sp, #8]
|
||||
; CHECK-NEXT: ldur q2, [sp, #56]
|
||||
; CHECK-NEXT: ldur q3, [sp, #40]
|
||||
; CHECK-NEXT: ldr d4, [sp, #72]
|
||||
|
@ -478,14 +478,14 @@ define void @caller_in_memory() {
|
|||
define void @callee_in_memory(%T_IN_MEMORY %a) {
|
||||
; CHECK-LABEL: callee_in_memory:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, in_memory_store
|
||||
; CHECK-NEXT: ldr d0, [sp, #64]
|
||||
; CHECK-NEXT: ldp q1, q2, [sp, #32]
|
||||
; CHECK-NEXT: adrp x8, in_memory_store
|
||||
; CHECK-NEXT: add x8, x8, :lo12:in_memory_store
|
||||
; CHECK-NEXT: str d0, [x8, #64]
|
||||
; CHECK-NEXT: ldr q3, [sp, #16]
|
||||
; CHECK-NEXT: stp q1, q2, [x8, #32]
|
||||
; CHECK-NEXT: ldp q1, q2, [sp, #32]
|
||||
; CHECK-NEXT: str d0, [x8, #64]
|
||||
; CHECK-NEXT: ldr q0, [sp]
|
||||
; CHECK-NEXT: stp q1, q2, [x8, #32]
|
||||
; CHECK-NEXT: stp q0, q3, [x8]
|
||||
; CHECK-NEXT: ret
|
||||
store %T_IN_MEMORY %a, %T_IN_MEMORY* @in_memory_store
|
||||
|
|
|
@ -71,8 +71,8 @@ define [2 x i64] @f4() {
|
|||
;
|
||||
; GISEL-LABEL: f4:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x9, x2+8
|
||||
; GISEL-NEXT: adrp x8, x2+8
|
||||
; GISEL-NEXT: adrp x9, x2+8
|
||||
; GISEL-NEXT: add x9, x9, :lo12:x2+8
|
||||
; GISEL-NEXT: ldr x0, [x8, :lo12:x2+8]
|
||||
; GISEL-NEXT: ldr x1, [x9, #8]
|
||||
|
|
|
@ -8,8 +8,8 @@ define void @test1() {
|
|||
; CHECK-LABEL: test1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: ldp x8, x9, [x8]
|
||||
; CHECK-NEXT: stp x8, x9, [x10]
|
||||
|
@ -23,8 +23,8 @@ define void @test2() {
|
|||
; CHECK-LABEL: test2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: ldp x8, x9, [x8, #504]
|
||||
; CHECK-NEXT: stp x8, x9, [x10, #504]
|
||||
|
@ -38,10 +38,10 @@ define void @test3() {
|
|||
; CHECK-LABEL: test3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: add x8, x8, #512
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: add x10, x10, #512
|
||||
; CHECK-NEXT: ldp x8, x9, [x8]
|
||||
; CHECK-NEXT: stp x8, x9, [x10]
|
||||
|
@ -55,8 +55,8 @@ define void @test4() {
|
|||
; CHECK-LABEL: test4:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: ldp x8, x9, [x8, #-512]
|
||||
; CHECK-NEXT: stp x8, x9, [x10, #-512]
|
||||
|
@ -70,10 +70,10 @@ define void @test5() {
|
|||
; CHECK-LABEL: test5:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: sub x8, x8, #520
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: sub x10, x10, #520
|
||||
; CHECK-NEXT: ldp x8, x9, [x8]
|
||||
; CHECK-NEXT: stp x8, x9, [x10]
|
||||
|
@ -87,10 +87,10 @@ define void @test6() {
|
|||
; CHECK-LABEL: test6:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: sub x8, x8, #520
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: sub x10, x10, #520
|
||||
; CHECK-NEXT: ldp x8, x9, [x8]
|
||||
; CHECK-NEXT: stp x8, x9, [x10]
|
||||
|
@ -104,10 +104,10 @@ define void @test7() {
|
|||
; CHECK-LABEL: test7:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, x
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x8, x8, :lo12:x
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: add x8, x8, #503
|
||||
; CHECK-NEXT: adrp x10, y
|
||||
; CHECK-NEXT: add x10, x10, :lo12:y
|
||||
; CHECK-NEXT: add x10, x10, #503
|
||||
; CHECK-NEXT: ldp x8, x9, [x8]
|
||||
; CHECK-NEXT: stp x8, x9, [x10]
|
||||
|
|
|
@ -9,8 +9,9 @@ define i32 @test_jumptable(i32 %in) {
|
|||
i32 2, label %lbl3
|
||||
i32 4, label %lbl4
|
||||
]
|
||||
; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE
|
||||
|
||||
; CHECK: mov w[[INDEX:[0-9]+]], w0
|
||||
; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE
|
||||
; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0@PAGEOFF
|
||||
; CHECK: adr [[BASE_BLOCK:x[0-9]+]], LBB0_2
|
||||
; CHECK: ldrb w[[OFFSET:[0-9]+]], [x[[JT]], x[[INDEX]]]
|
||||
|
|
|
@ -55,8 +55,8 @@ entry:
|
|||
; CHECK-LABEL: OUTLINED_FUNCTION_0:
|
||||
; CHECK: .cfi_startproc
|
||||
; CHECK: adrp x1, _ZTIi
|
||||
; CHECK-NEXT: mov x2, xzr
|
||||
; CHECK-NEXT: add x1, x1, :lo12:_ZTIi
|
||||
; CHECK-NEXT: mov x2, xzr
|
||||
; CHECK-NEXT: str w19, [x0]
|
||||
; CHECK-NEXT: b __cxa_throw
|
||||
; CHECK: .cfi_endproc
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-adrp-add,-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-adrp-add,+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
|
|
|
@ -23,11 +23,11 @@ define dso_local void @run_test() local_unnamed_addr #0 {
|
|||
; CHECK-NEXT: .cfi_offset b14, -56
|
||||
; CHECK-NEXT: .cfi_offset b15, -64
|
||||
; CHECK-NEXT: movi v14.2d, #0000000000000000
|
||||
; CHECK-NEXT: adrp x10, B+48
|
||||
; CHECK-NEXT: adrp x11, A
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: mov x9, xzr
|
||||
; CHECK-NEXT: adrp x10, B+48
|
||||
; CHECK-NEXT: add x10, x10, :lo12:B+48
|
||||
; CHECK-NEXT: adrp x11, A
|
||||
; CHECK-NEXT: add x11, x11, :lo12:A
|
||||
; CHECK-NEXT: // implicit-def: $q2
|
||||
; CHECK-NEXT: // implicit-def: $q3
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
; CHECK-LABEL: vls_sve_and_64xi8:
|
||||
; CHECK-NEXT: adrp x[[ONE:[0-9]+]], .LCPI0_0
|
||||
; CHECK-NEXT: ptrue p0.b, vl64
|
||||
; CHECK-NEXT: add x[[TWO:[0-9]+]], x[[ONE]], :lo12:.LCPI0_0
|
||||
; CHECK-NEXT: ptrue p0.b, vl64
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x[[TWO]]]
|
||||
; CHECK-NEXT: and z0.d, z0.d, z1.d
|
||||
|
|
|
@ -578,8 +578,8 @@ define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
|
|||
; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI55_0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: add x8, x8, :lo12:.LCPI55_0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: ret
|
||||
%1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0
|
||||
|
|
Loading…
Reference in New Issue