[RISCV] Return false from isOffsetFoldingLegal instead of reversing the fold in lowering.

When lowering GlobalAddressNodes, we were removing a non-zero offset and creating a separate ADD. It already comes out of SelectionDAGBuilder with a separate ADD. The ADD was being removed by DAGCombiner. This patch disables the DAG combine so we don't have to reverse it. Test changes all look to be instruction order changes. Probably due to different DAG node ordering. Differential Revision: https://reviews.llvm.org/D126558
2022-05-27 10:16:32 -07:00 · 2022-05-27 10:16:32 -07:00 · aaad507546
parent 5df2893a9a
commit aaad507546
11 changed files with 58 additions and 64 deletions
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -1231,6 +1231,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
  return true;
 }

+bool RISCVTargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  // In order to maximise the opportunity for common subexpression elimination,
+  // keep a separate ADD node for the global address offset instead of folding
+  // it in the global address node. Later peephole optimisations may choose to
+  // fold it back in when profitable.
+  return false;
+}
+
 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                       bool ForCodeSize) const {
  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
@ -3578,21 +3587,12 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
  SDLoc DL(Op);
  EVT Ty = Op.getValueType();
  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
-  int64_t Offset = N->getOffset();
+  assert(N->getOffset() == 0 && "unexpected offset in global node");
  MVT XLenVT = Subtarget.getXLenVT();

  const GlobalValue *GV = N->getGlobal();
  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
-  SDValue Addr = getAddr(N, DAG, IsLocal);
-
-  // In order to maximise the opportunity for common subexpression elimination,
-  // emit a separate ADD node for the global address offset instead of folding
-  // it in the global address node. Later peephole optimisations may choose to
-  // fold it back in when profitable.
-  if (Offset != 0)
-    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
-                       DAG.getConstant(Offset, DL, XLenVT));
-  return Addr;
+  return getAddr(N, DAG, IsLocal);
 }

 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@ -3701,7 +3701,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
  SDLoc DL(Op);
  EVT Ty = Op.getValueType();
  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
-  int64_t Offset = N->getOffset();
+  assert(N->getOffset() == 0 && "unexpected offset in global node");
  MVT XLenVT = Subtarget.getXLenVT();

  TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
@ -3724,13 +3724,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
    break;
  }

-  // In order to maximise the opportunity for common subexpression elimination,
-  // emit a separate ADD node for the global address offset instead of folding
-  // it in the global address node. Later peephole optimisations may choose to
-  // fold it back in when profitable.
-  if (Offset != 0)
-    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
-                       DAG.getConstant(Offset, DL, XLenVT));
  return Addr;
 }

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@ -354,6 +354,7 @@ public:
      SelectionDAG &DAG) const override;
  bool shouldSinkOperands(Instruction *I,
                          SmallVectorImpl<Use *> &Ops) const override;
+  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
  bool isFPImmLegal(const APFloat &Imm, EVT VT,
                    bool ForCodeSize) const override;

--- a/llvm/test/CodeGen/RISCV/double-mem.ll
+++ b/llvm/test/CodeGen/RISCV/double-mem.ll
@ -59,10 +59,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; RV32IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV32IFD-NEXT:    lui a0, %hi(G)
 ; RV32IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV32IFD-NEXT:    addi a1, a0, %lo(G)
 ; RV32IFD-NEXT:    fsd fa0, %lo(G)(a0)
-; RV32IFD-NEXT:    addi a0, a0, %lo(G)
-; RV32IFD-NEXT:    fld ft0, 72(a0)
-; RV32IFD-NEXT:    fsd fa0, 72(a0)
+; RV32IFD-NEXT:    fld ft0, 72(a1)
+; RV32IFD-NEXT:    fsd fa0, 72(a1)
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fld_fsd_global:
@ -70,10 +70,10 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; RV64IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV64IFD-NEXT:    lui a0, %hi(G)
 ; RV64IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV64IFD-NEXT:    addi a1, a0, %lo(G)
 ; RV64IFD-NEXT:    fsd fa0, %lo(G)(a0)
-; RV64IFD-NEXT:    addi a0, a0, %lo(G)
-; RV64IFD-NEXT:    fld ft0, 72(a0)
-; RV64IFD-NEXT:    fsd fa0, 72(a0)
+; RV64IFD-NEXT:    fld ft0, 72(a1)
+; RV64IFD-NEXT:    fsd fa0, 72(a1)
 ; RV64IFD-NEXT:    ret
 ; Use %a and %b in an FP op to ensure floating point registers are used, even
 ; for the soft float ABI
--- a/llvm/test/CodeGen/RISCV/float-mem.ll
+++ b/llvm/test/CodeGen/RISCV/float-mem.ll
@ -61,10 +61,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; RV32IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV32IF-NEXT:    lui a0, %hi(G)
 ; RV32IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV32IF-NEXT:    addi a1, a0, %lo(G)
 ; RV32IF-NEXT:    fsw fa0, %lo(G)(a0)
-; RV32IF-NEXT:    addi a0, a0, %lo(G)
-; RV32IF-NEXT:    flw ft0, 36(a0)
-; RV32IF-NEXT:    fsw fa0, 36(a0)
+; RV32IF-NEXT:    flw ft0, 36(a1)
+; RV32IF-NEXT:    fsw fa0, 36(a1)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: flw_fsw_global:
@ -72,10 +72,10 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; RV64IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV64IF-NEXT:    lui a0, %hi(G)
 ; RV64IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV64IF-NEXT:    addi a1, a0, %lo(G)
 ; RV64IF-NEXT:    fsw fa0, %lo(G)(a0)
-; RV64IF-NEXT:    addi a0, a0, %lo(G)
-; RV64IF-NEXT:    flw ft0, 36(a0)
-; RV64IF-NEXT:    fsw fa0, 36(a0)
+; RV64IF-NEXT:    flw ft0, 36(a1)
+; RV64IF-NEXT:    fsw fa0, 36(a1)
 ; RV64IF-NEXT:    ret
  %1 = fadd float %a, %b
  %2 = load volatile float, float* @G
--- a/llvm/test/CodeGen/RISCV/half-mem.ll
+++ b/llvm/test/CodeGen/RISCV/half-mem.ll
@ -61,10 +61,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; RV32IZFH-NEXT:    fadd.h fa0, fa0, fa1
 ; RV32IZFH-NEXT:    lui a0, %hi(G)
 ; RV32IZFH-NEXT:    flh ft0, %lo(G)(a0)
+; RV32IZFH-NEXT:    addi a1, a0, %lo(G)
 ; RV32IZFH-NEXT:    fsh fa0, %lo(G)(a0)
-; RV32IZFH-NEXT:    addi a0, a0, %lo(G)
-; RV32IZFH-NEXT:    flh ft0, 18(a0)
-; RV32IZFH-NEXT:    fsh fa0, 18(a0)
+; RV32IZFH-NEXT:    flh ft0, 18(a1)
+; RV32IZFH-NEXT:    fsh fa0, 18(a1)
 ; RV32IZFH-NEXT:    ret
 ;
 ; RV64IZFH-LABEL: flh_fsh_global:
@ -72,10 +72,10 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; RV64IZFH-NEXT:    fadd.h fa0, fa0, fa1
 ; RV64IZFH-NEXT:    lui a0, %hi(G)
 ; RV64IZFH-NEXT:    flh ft0, %lo(G)(a0)
+; RV64IZFH-NEXT:    addi a1, a0, %lo(G)
 ; RV64IZFH-NEXT:    fsh fa0, %lo(G)(a0)
-; RV64IZFH-NEXT:    addi a0, a0, %lo(G)
-; RV64IZFH-NEXT:    flh ft0, 18(a0)
-; RV64IZFH-NEXT:    fsh fa0, 18(a0)
+; RV64IZFH-NEXT:    flh ft0, 18(a1)
+; RV64IZFH-NEXT:    fsh fa0, 18(a1)
 ; RV64IZFH-NEXT:    ret
  %1 = fadd half %a, %b
  %2 = load volatile half, half* @G
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@ -99,11 +99,11 @@ define dso_local i32* @big_offset_one_use() local_unnamed_addr nounwind {
 ;
 ; RV64-LABEL: big_offset_one_use:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, 4
-; RV64-NEXT:    addiw a0, a0, 188
-; RV64-NEXT:    lui a1, %hi(s)
-; RV64-NEXT:    addi a1, a1, %lo(s)
-; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    lui a0, %hi(s)
+; RV64-NEXT:    addi a0, a0, %lo(s)
+; RV64-NEXT:    lui a1, 4
+; RV64-NEXT:    addiw a1, a1, 188
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    ret
 entry:
  ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5)
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
@ -42,7 +42,7 @@ define void @test(i32 signext %i) nounwind {
 ; RV32-NEXT:    addi a3, a3, 1
 ; RV32-NEXT:  .LBB0_2: # %bb
 ; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32-NEXT:    add a4, a1, a2
+; RV32-NEXT:    add a4, a2, a1
 ; RV32-NEXT:    add a1, a1, a0
 ; RV32-NEXT:    sb zero, 0(a4)
 ; RV32-NEXT:    blt a1, a3, .LBB0_2
@ -65,7 +65,7 @@ define void @test(i32 signext %i) nounwind {
 ; RV64-NEXT:    addw a5, a5, a1
 ; RV64-NEXT:    slli a6, a5, 32
 ; RV64-NEXT:    srli a6, a6, 32
-; RV64-NEXT:    add a6, a6, a3
+; RV64-NEXT:    add a6, a3, a6
 ; RV64-NEXT:    sb zero, 0(a6)
 ; RV64-NEXT:    addw a5, a5, a0
 ; RV64-NEXT:    addiw a2, a2, 1
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
@ -54,10 +54,10 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind {
 ; RV64-NEXT:    blez a1, .LBB0_3
 ; RV64-NEXT:  # %bb.1: # %cond_true.preheader
 ; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    slli a0, a0, 6
 ; RV64-NEXT:    lui a2, %hi(A)
 ; RV64-NEXT:    addi a2, a2, %lo(A)
-; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    add a0, a0, a2
+; RV64-NEXT:    add a0, a2, a0
 ; RV64-NEXT:    li a2, 4
 ; RV64-NEXT:    li a3, 5
 ; RV64-NEXT:  .LBB0_2: # %cond_true
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@ -170,10 +170,10 @@ define dso_local i32 @lw_sw_global(i32 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, %hi(G)
 ; RV32I-NEXT:    lw a1, %lo(G)(a2)
+; RV32I-NEXT:    addi a3, a2, %lo(G)
 ; RV32I-NEXT:    sw a0, %lo(G)(a2)
-; RV32I-NEXT:    addi a2, a2, %lo(G)
-; RV32I-NEXT:    lw a3, 36(a2)
-; RV32I-NEXT:    sw a0, 36(a2)
+; RV32I-NEXT:    lw a2, 36(a3)
+; RV32I-NEXT:    sw a0, 36(a3)
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    ret
  %1 = load volatile i32, i32* @G
--- a/llvm/test/CodeGen/RISCV/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/mem64.ll
@ -215,10 +215,10 @@ define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a2, %hi(G)
 ; RV64I-NEXT:    ld a1, %lo(G)(a2)
+; RV64I-NEXT:    addi a3, a2, %lo(G)
 ; RV64I-NEXT:    sd a0, %lo(G)(a2)
-; RV64I-NEXT:    addi a2, a2, %lo(G)
-; RV64I-NEXT:    ld a3, 72(a2)
-; RV64I-NEXT:    sd a0, 72(a2)
+; RV64I-NEXT:    ld a2, 72(a3)
+; RV64I-NEXT:    sd a0, 72(a3)
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    ret
  %1 = load volatile i64, i64* @G
--- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
+++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
@ -10,12 +10,12 @@ define dso_local i32 @test_zext_i8() nounwind {
 ; RV32I-LABEL: test_zext_i8:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(bytes)
-; RV32I-NEXT:    lbu a1, %lo(bytes)(a0)
-; RV32I-NEXT:    addi a0, a0, %lo(bytes)
-; RV32I-NEXT:    lbu a0, 1(a0)
-; RV32I-NEXT:    xori a1, a1, 136
-; RV32I-NEXT:    xori a0, a0, 7
-; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    addi a1, a0, %lo(bytes)
+; RV32I-NEXT:    lbu a0, %lo(bytes)(a0)
+; RV32I-NEXT:    lbu a1, 1(a1)
+; RV32I-NEXT:    xori a0, a0, 136
+; RV32I-NEXT:    xori a1, a1, 7
+; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    beqz a0, .LBB0_2
 ; RV32I-NEXT:  # %bb.1: # %if.then
 ; RV32I-NEXT:    li a0, 1
@ -42,14 +42,14 @@ define dso_local i32 @test_zext_i16() nounwind {
 ; RV32I-LABEL: test_zext_i16:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(shorts)
-; RV32I-NEXT:    lhu a1, %lo(shorts)(a0)
-; RV32I-NEXT:    addi a0, a0, %lo(shorts)
-; RV32I-NEXT:    lhu a0, 2(a0)
+; RV32I-NEXT:    addi a1, a0, %lo(shorts)
+; RV32I-NEXT:    lhu a0, %lo(shorts)(a0)
+; RV32I-NEXT:    lhu a1, 2(a1)
 ; RV32I-NEXT:    lui a2, 16
 ; RV32I-NEXT:    addi a2, a2, -120
-; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    xori a0, a0, 7
-; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    xor a0, a0, a2
+; RV32I-NEXT:    xori a1, a1, 7
+; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    beqz a0, .LBB1_2
 ; RV32I-NEXT:  # %bb.1: # %if.then
 ; RV32I-NEXT:    li a0, 1