[RISCV] Return false from isOffsetFoldingLegal instead of reversing the fold in lowering.

When lowering GlobalAddressNodes, we were removing a non-zero offset and
creating a separate ADD.

It already comes out of SelectionDAGBuilder with a separate ADD. The
ADD was being removed by DAGCombiner.

This patch disables the DAG combine so we don't have to reverse it.
Test changes all look to be instruction order changes. Probably due
to different DAG node ordering.

Differential Revision: https://reviews.llvm.org/D126558
This commit is contained in:
Craig Topper 2022-05-27 10:16:32 -07:00
parent 5df2893a9a
commit aaad507546
11 changed files with 58 additions and 64 deletions

View File

@ -1231,6 +1231,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
return true;
}
bool RISCVTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// In order to maximise the opportunity for common subexpression elimination,
// keep a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
return false;
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
@ -3578,21 +3587,12 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
assert(N->getOffset() == 0 && "unexpected offset in global node");
MVT XLenVT = Subtarget.getXLenVT();
const GlobalValue *GV = N->getGlobal();
bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
SDValue Addr = getAddr(N, DAG, IsLocal);
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
return getAddr(N, DAG, IsLocal);
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@ -3701,7 +3701,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
assert(N->getOffset() == 0 && "unexpected offset in global node");
MVT XLenVT = Subtarget.getXLenVT();
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
@ -3724,13 +3724,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
break;
}
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}

View File

@ -354,6 +354,7 @@ public:
SelectionDAG &DAG) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;

View File

@ -59,10 +59,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
; RV32IFD-NEXT: fadd.d fa0, fa0, fa1
; RV32IFD-NEXT: lui a0, %hi(G)
; RV32IFD-NEXT: fld ft0, %lo(G)(a0)
; RV32IFD-NEXT: addi a1, a0, %lo(G)
; RV32IFD-NEXT: fsd fa0, %lo(G)(a0)
; RV32IFD-NEXT: addi a0, a0, %lo(G)
; RV32IFD-NEXT: fld ft0, 72(a0)
; RV32IFD-NEXT: fsd fa0, 72(a0)
; RV32IFD-NEXT: fld ft0, 72(a1)
; RV32IFD-NEXT: fsd fa0, 72(a1)
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fld_fsd_global:
@ -70,10 +70,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
; RV64IFD-NEXT: fadd.d fa0, fa0, fa1
; RV64IFD-NEXT: lui a0, %hi(G)
; RV64IFD-NEXT: fld ft0, %lo(G)(a0)
; RV64IFD-NEXT: addi a1, a0, %lo(G)
; RV64IFD-NEXT: fsd fa0, %lo(G)(a0)
; RV64IFD-NEXT: addi a0, a0, %lo(G)
; RV64IFD-NEXT: fld ft0, 72(a0)
; RV64IFD-NEXT: fsd fa0, 72(a0)
; RV64IFD-NEXT: fld ft0, 72(a1)
; RV64IFD-NEXT: fsd fa0, 72(a1)
; RV64IFD-NEXT: ret
; Use %a and %b in an FP op to ensure floating point registers are used, even
; for the soft float ABI

View File

@ -61,10 +61,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
; RV32IF-NEXT: fadd.s fa0, fa0, fa1
; RV32IF-NEXT: lui a0, %hi(G)
; RV32IF-NEXT: flw ft0, %lo(G)(a0)
; RV32IF-NEXT: addi a1, a0, %lo(G)
; RV32IF-NEXT: fsw fa0, %lo(G)(a0)
; RV32IF-NEXT: addi a0, a0, %lo(G)
; RV32IF-NEXT: flw ft0, 36(a0)
; RV32IF-NEXT: fsw fa0, 36(a0)
; RV32IF-NEXT: flw ft0, 36(a1)
; RV32IF-NEXT: fsw fa0, 36(a1)
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: flw_fsw_global:
@ -72,10 +72,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
; RV64IF-NEXT: fadd.s fa0, fa0, fa1
; RV64IF-NEXT: lui a0, %hi(G)
; RV64IF-NEXT: flw ft0, %lo(G)(a0)
; RV64IF-NEXT: addi a1, a0, %lo(G)
; RV64IF-NEXT: fsw fa0, %lo(G)(a0)
; RV64IF-NEXT: addi a0, a0, %lo(G)
; RV64IF-NEXT: flw ft0, 36(a0)
; RV64IF-NEXT: fsw fa0, 36(a0)
; RV64IF-NEXT: flw ft0, 36(a1)
; RV64IF-NEXT: fsw fa0, 36(a1)
; RV64IF-NEXT: ret
%1 = fadd float %a, %b
%2 = load volatile float, float* @G

View File

@ -61,10 +61,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
; RV32IZFH-NEXT: fadd.h fa0, fa0, fa1
; RV32IZFH-NEXT: lui a0, %hi(G)
; RV32IZFH-NEXT: flh ft0, %lo(G)(a0)
; RV32IZFH-NEXT: addi a1, a0, %lo(G)
; RV32IZFH-NEXT: fsh fa0, %lo(G)(a0)
; RV32IZFH-NEXT: addi a0, a0, %lo(G)
; RV32IZFH-NEXT: flh ft0, 18(a0)
; RV32IZFH-NEXT: fsh fa0, 18(a0)
; RV32IZFH-NEXT: flh ft0, 18(a1)
; RV32IZFH-NEXT: fsh fa0, 18(a1)
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: flh_fsh_global:
@ -72,10 +72,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
; RV64IZFH-NEXT: fadd.h fa0, fa0, fa1
; RV64IZFH-NEXT: lui a0, %hi(G)
; RV64IZFH-NEXT: flh ft0, %lo(G)(a0)
; RV64IZFH-NEXT: addi a1, a0, %lo(G)
; RV64IZFH-NEXT: fsh fa0, %lo(G)(a0)
; RV64IZFH-NEXT: addi a0, a0, %lo(G)
; RV64IZFH-NEXT: flh ft0, 18(a0)
; RV64IZFH-NEXT: fsh fa0, 18(a0)
; RV64IZFH-NEXT: flh ft0, 18(a1)
; RV64IZFH-NEXT: fsh fa0, 18(a1)
; RV64IZFH-NEXT: ret
%1 = fadd half %a, %b
%2 = load volatile half, half* @G

View File

@ -99,11 +99,11 @@ define dso_local i32* @big_offset_one_use() local_unnamed_addr nounwind {
;
; RV64-LABEL: big_offset_one_use:
; RV64: # %bb.0: # %entry
; RV64-NEXT: lui a0, 4
; RV64-NEXT: addiw a0, a0, 188
; RV64-NEXT: lui a1, %hi(s)
; RV64-NEXT: addi a1, a1, %lo(s)
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: lui a0, %hi(s)
; RV64-NEXT: addi a0, a0, %lo(s)
; RV64-NEXT: lui a1, 4
; RV64-NEXT: addiw a1, a1, 188
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: ret
entry:
ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5)

View File

@ -42,7 +42,7 @@ define void @test(i32 signext %i) nounwind {
; RV32-NEXT: addi a3, a3, 1
; RV32-NEXT: .LBB0_2: # %bb
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: add a4, a1, a2
; RV32-NEXT: add a4, a2, a1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: sb zero, 0(a4)
; RV32-NEXT: blt a1, a3, .LBB0_2
@ -65,7 +65,7 @@ define void @test(i32 signext %i) nounwind {
; RV64-NEXT: addw a5, a5, a1
; RV64-NEXT: slli a6, a5, 32
; RV64-NEXT: srli a6, a6, 32
; RV64-NEXT: add a6, a6, a3
; RV64-NEXT: add a6, a3, a6
; RV64-NEXT: sb zero, 0(a6)
; RV64-NEXT: addw a5, a5, a0
; RV64-NEXT: addiw a2, a2, 1

View File

@ -54,10 +54,10 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind {
; RV64-NEXT: blez a1, .LBB0_3
; RV64-NEXT: # %bb.1: # %cond_true.preheader
; RV64-NEXT: li a4, 0
; RV64-NEXT: slli a0, a0, 6
; RV64-NEXT: lui a2, %hi(A)
; RV64-NEXT: addi a2, a2, %lo(A)
; RV64-NEXT: slli a0, a0, 6
; RV64-NEXT: add a0, a0, a2
; RV64-NEXT: add a0, a2, a0
; RV64-NEXT: li a2, 4
; RV64-NEXT: li a3, 5
; RV64-NEXT: .LBB0_2: # %cond_true

View File

@ -170,10 +170,10 @@ define dso_local i32 @lw_sw_global(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: lui a2, %hi(G)
; RV32I-NEXT: lw a1, %lo(G)(a2)
; RV32I-NEXT: addi a3, a2, %lo(G)
; RV32I-NEXT: sw a0, %lo(G)(a2)
; RV32I-NEXT: addi a2, a2, %lo(G)
; RV32I-NEXT: lw a3, 36(a2)
; RV32I-NEXT: sw a0, 36(a2)
; RV32I-NEXT: lw a2, 36(a3)
; RV32I-NEXT: sw a0, 36(a3)
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: ret
%1 = load volatile i32, i32* @G

View File

@ -215,10 +215,10 @@ define dso_local i64 @ld_sd_global(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, %hi(G)
; RV64I-NEXT: ld a1, %lo(G)(a2)
; RV64I-NEXT: addi a3, a2, %lo(G)
; RV64I-NEXT: sd a0, %lo(G)(a2)
; RV64I-NEXT: addi a2, a2, %lo(G)
; RV64I-NEXT: ld a3, 72(a2)
; RV64I-NEXT: sd a0, 72(a2)
; RV64I-NEXT: ld a2, 72(a3)
; RV64I-NEXT: sd a0, 72(a3)
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
%1 = load volatile i64, i64* @G

View File

@ -10,12 +10,12 @@ define dso_local i32 @test_zext_i8() nounwind {
; RV32I-LABEL: test_zext_i8:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(bytes)
; RV32I-NEXT: lbu a1, %lo(bytes)(a0)
; RV32I-NEXT: addi a0, a0, %lo(bytes)
; RV32I-NEXT: lbu a0, 1(a0)
; RV32I-NEXT: xori a1, a1, 136
; RV32I-NEXT: xori a0, a0, 7
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: addi a1, a0, %lo(bytes)
; RV32I-NEXT: lbu a0, %lo(bytes)(a0)
; RV32I-NEXT: lbu a1, 1(a1)
; RV32I-NEXT: xori a0, a0, 136
; RV32I-NEXT: xori a1, a1, 7
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.1: # %if.then
; RV32I-NEXT: li a0, 1
@ -42,14 +42,14 @@ define dso_local i32 @test_zext_i16() nounwind {
; RV32I-LABEL: test_zext_i16:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(shorts)
; RV32I-NEXT: lhu a1, %lo(shorts)(a0)
; RV32I-NEXT: addi a0, a0, %lo(shorts)
; RV32I-NEXT: lhu a0, 2(a0)
; RV32I-NEXT: addi a1, a0, %lo(shorts)
; RV32I-NEXT: lhu a0, %lo(shorts)(a0)
; RV32I-NEXT: lhu a1, 2(a1)
; RV32I-NEXT: lui a2, 16
; RV32I-NEXT: addi a2, a2, -120
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: xori a0, a0, 7
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: xori a1, a1, 7
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: beqz a0, .LBB1_2
; RV32I-NEXT: # %bb.1: # %if.then
; RV32I-NEXT: li a0, 1