[RISCV] Ensure the entire stack is aligned to the RVV stack alignment

This patch fixes another bug in the RVV frame lowering. While some frame
objects with non-default stack IDs (such scalable-vector alloca
instructions) are considered in the target-independent max alignment
calculations, others (for example, during calling-convention lowering)
are not. This means we'd occasionally align the base of the stack to
only 16 bytes, with no way to ensure that the RVV section contained
within that is aligned to anything higher.

Reviewed By: StephenFan

Differential Revision: https://reviews.llvm.org/D125973
This commit is contained in:
Fraser Cormack 2022-05-19 14:47:40 +01:00
parent cb8681a2b3
commit 08c9fb8447
7 changed files with 53 additions and 39 deletions

View File

@ -962,6 +962,11 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
RVFI->setRVVStackSize(RVVStackSize);
RVFI->setRVVStackAlign(RVVStackAlign);
// Ensure the entire stack is aligned to at least the RVV requirement: some
// scalable-vector object alignments are not considered by the
// target-independent code.
MFI.ensureMaxAlignment(RVVStackAlign);
const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
// estimateStackSize has been observed to under-estimate the final stack

View File

@ -290,9 +290,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a3, a0, a1
@ -308,9 +311,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: vmv8r.v v8, v0
; RV32-NEXT: vmv8r.v v16, v24
; RV32-NEXT: call ext2@plt
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
@ -321,9 +322,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a3, a0, a1
@ -339,9 +343,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: vmv8r.v v8, v0
; RV64-NEXT: vmv8r.v v16, v24
; RV64-NEXT: call ext2@plt
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
@ -356,10 +358,13 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a3, a2, a1
@ -414,10 +419,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: addi a1, a1, 128
; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: call ext3@plt
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 48
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
@ -428,10 +430,13 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a3, 48
; RV64-NEXT: mul a1, a1, a3
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a3, a2, a1
@ -486,10 +491,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: addi a1, a1, 128
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: call ext3@plt
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: li a1, 48
; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
@ -524,9 +526,12 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: addi a1, sp, 128
@ -563,9 +568,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: li a0, 0
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: call vector_arg_indirect_stack@plt
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
@ -576,9 +579,12 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 5
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: addi a1, sp, 128
@ -615,9 +621,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV64-NEXT: li a0, 0
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: call vector_arg_indirect_stack@plt
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 5
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret

View File

@ -27,9 +27,12 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: addi a1, sp, 128
@ -42,9 +45,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: call callee_scalable_vector_split_indirect@plt
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
@ -55,9 +56,12 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: addi a1, sp, 128
@ -70,9 +74,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: call callee_scalable_vector_split_indirect@plt
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret

View File

@ -14,10 +14,10 @@
; CHECK-NEXT: sd a0, 32(sp)
; CHECK-NEXT: sd a0, 16(sp)
; CHECK-NEXT: vsetivli a5, 1, e16, m1, ta, mu
; CHECK-NEXT: sd a1, 8(sp)
; CHECK-NEXT: sd a1, 0(sp)
; CHECK-NEXT: addi a1, sp, 24
; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: ld a1, 8(sp)
; CHECK-NEXT: ld a1, 0(sp)
; CHECK-NEXT: call fixedlen_vector_spillslot@plt
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48

View File

@ -561,9 +561,12 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: addi s0, sp, 64
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: andi sp, sp, -64
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, sp, 128
@ -578,9 +581,7 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, s0, -64
; CHECK-NEXT: addi sp, sp, 64
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv

View File

@ -18,7 +18,7 @@ define signext i32 @foo(i32 signext %aa) #0 {
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: andi sp, sp, -8
; CHECK-NEXT: andi sp, sp, -16
; CHECK-NEXT: mv s1, sp
; CHECK-NEXT: lw t0, 44(s1)
; CHECK-NEXT: lw a2, 40(s1)

View File

@ -26,28 +26,30 @@ define <vscale x 16 x i32> @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5,
; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: addi s0, sp, 80
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 4
; CHECK-NEXT: sub sp, sp, t0
; CHECK-NEXT: addi t0, sp, 64
; CHECK-NEXT: andi sp, sp, -64
; CHECK-NEXT: mv s1, sp
; CHECK-NEXT: addi t0, s1, 64
; CHECK-NEXT: sd t0, 8(sp)
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 3
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: add t0, s1, t0
; CHECK-NEXT: addi t0, t0, 64
; CHECK-NEXT: sd t0, 0(sp)
; CHECK-NEXT: addi t0, sp, 64
; CHECK-NEXT: addi t0, s1, 64
; CHECK-NEXT: vs8r.v v8, (t0)
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 3
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: add t0, s1, t0
; CHECK-NEXT: addi t0, t0, 64
; CHECK-NEXT: vs8r.v v8, (t0)
; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: call bar@plt
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, s0, -80
; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret