[VENTUS][RISCV][fix] Fix SP stack size calculation error

This commit is contained in:
zhoujing 2023-06-15 18:12:34 +08:00
parent e54daab265
commit c30c837caa
9 changed files with 161 additions and 164 deletions

View File

@ -22,8 +22,8 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCDwarf.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@ -622,7 +622,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
const auto *RMFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Callee-saved registers should be referenced relative to the stack
// pointer (positive offset), otherwise use the frame pointer (negative
@ -647,20 +647,19 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
}
// This goes to the offset calculation of callee saved register, ra/s0
if (FI >= MinCSFI && FI <= MaxCSFI) {
// sp represents SGPR spill, tp represents VGPR spill
// FIXME: we need to define TargetStackID::VGPRSpill?
FrameReg = StackID == TargetStackID::SGPRSpill ? RISCV::X2 : RISCV::X4;
if (FirstSPAdjustAmount)
Offset -= StackOffset::getFixed(FirstSPAdjustAmount);
else
Offset -= StackOffset::getFixed(MFI.getStackSize());
// if (FirstSPAdjustAmount)
// Offset -= StackOffset::getFixed(FirstSPAdjustAmount);
// else
Offset -= StackOffset::getFixed(getStackSize(const_cast<MachineFunction&>(MF)
, RISCVStackID::SGPRSpill));
return Offset;
}
// assert(StackID == TargetStackID::Default &&
// "SGPRSpill stack should not reach here!");
if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
assert(0 && "TODO: Add stack realignment support for Ventus?");
// If the per-thread stack was realigned, the frame pointer is set in order
@ -693,9 +692,9 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FrameReg == getFPReg(STI)) {
// assert(0 && "TODO: Add fp support for Ventus?");
Offset -= StackOffset::getFixed(RVFI->getVarArgsSaveSize());
Offset -= StackOffset::getFixed(RMFI->getVarArgsSaveSize());
if (FI >= 0)
Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
Offset -= StackOffset::getFixed(RMFI->getLibCallStackSize());
// When using FP to access scalable vector objects, we need to minus
// the frame size.
//
@ -732,7 +731,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
assert(!RI->hasStackRealignment(MF) &&
"Can't index across variable sized realign");
Offset -= StackOffset::get(MFI.getStackSize() +
RVFI->getLibCallStackSize(),0);
RMFI->getLibCallStackSize(),0);
} else {
Offset -= StackOffset::getFixed(MFI.getStackSize());
}
@ -879,8 +878,12 @@ uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
uint64_t StackSize = 0;
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
if(static_cast<unsigned>(MFI.getStackID(I)) == ID)
StackSize += MFI.getObjectSize(I);
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
// Need to consider the alignment for different frame index
uint64_t Align = MFI.getObjectAlign(I).value();
StackSize += ceil(double(Align) / 4) * MFI.getObjectSize(I);
}
}
return StackSize;
}
@ -892,6 +895,9 @@ void RISCVFrameLowering::deterMineStackID(MachineFunction &MF) const {
if((MFI.getStackID(I) != RISCVStackID::SGPRSpill) &&
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
MFI.setStackID(I, RISCVStackID::VGPRSpill);
else
// FIXME: other stack?
MFI.setStackID(I, RISCVStackID::SGPRSpill);
}
}
@ -927,9 +933,9 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 ) {
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
RC, TRI);
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
}
}

View File

@ -5,15 +5,15 @@
define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: func:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi sp, sp, 12
; VENTUS-NEXT: addi tp, tp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: .cfi_def_cfa_offset 12
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: sw s1, -12(sp)
; VENTUS-NEXT: .cfi_offset ra, 12
; VENTUS-NEXT: .cfi_offset s0, 8
; VENTUS-NEXT: .cfi_offset s1, 4
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset s0, 4
; VENTUS-NEXT: .cfi_offset s1, 0
; VENTUS-NEXT: .cfi_offset v32.l, 0
; VENTUS-NEXT: lw s0, 0(a0)
; VENTUS-NEXT: lw s1, 4(a0)
@ -33,7 +33,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: lw s1, -12(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: ret
entry:

View File

@ -8,12 +8,12 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi tp, tp, 20
; VENTUS-NEXT: addi tp, tp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -36(sp)
; VENTUS-NEXT: sw s0, -40(sp)
; VENTUS-NEXT: sw s1, -44(sp)
; VENTUS-NEXT: sw s2, -48(sp)
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: sw s1, -12(sp)
; VENTUS-NEXT: sw s2, -16(sp)
; VENTUS-NEXT: .cfi_offset ra, 12
; VENTUS-NEXT: .cfi_offset s0, 8
; VENTUS-NEXT: .cfi_offset s1, 4
@ -21,18 +21,17 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: lw s0, 0(a0)
; VENTUS-NEXT: lui a0, %hi(foo.b)
; VENTUS-NEXT: addi s1, a0, %lo(foo.b)
; VENTUS-NEXT: addi s2, sp, -32
; VENTUS-NEXT: vmv.v.x v0, s2
; VENTUS-NEXT: vmv.v.x v1, s1
; VENTUS-NEXT: vmv.v.x v2, s0
; VENTUS-NEXT: addi a0, sp, -16
; VENTUS-NEXT: vmv.v.x v0, s1
; VENTUS-NEXT: vmv.v.x v1, s0
; VENTUS-NEXT: addi s2, sp, -16
; VENTUS-NEXT: call bar
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z12get_local_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: li a1, 4
; VENTUS-NEXT: vmv.v.x v0, a1
; VENTUS-NEXT: li a0, 4
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbltu v0, v1, .LBB0_2
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vbltu v1, v0, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %if.then
; VENTUS-NEXT: slli a0, a0, 2
; VENTUS-NEXT: add s2, s2, a0
@ -53,12 +52,12 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: sw zero, 0(a0)
; VENTUS-NEXT: join v0, v0, .LBB0_3
; VENTUS-NEXT: .LBB0_3: # %if.end
; VENTUS-NEXT: lw ra, -36(sp)
; VENTUS-NEXT: lw s0, -40(sp)
; VENTUS-NEXT: lw s1, -44(sp)
; VENTUS-NEXT: lw s2, -48(sp)
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: lw s1, -12(sp)
; VENTUS-NEXT: lw s2, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: addi tp, tp, -20
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: ret
entry:
%a = alloca [5 x i32], align 4, addrspace(5)
@ -161,7 +160,6 @@ entry:
define dso_local void @private_memmory(ptr addrspace(5) nocapture noundef %a) local_unnamed_addr {
; VENTUS-LABEL: private_memmory:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vlw.v v0, 0(a0)
; VENTUS-NEXT: lui a1, %hi(global_int)
; VENTUS-NEXT: lw a1, %lo(global_int)(a1)
@ -180,7 +178,6 @@ entry:
define dso_local void @private_memmory_with_offset(ptr addrspace(5) nocapture noundef %a) local_unnamed_addr{
; VENTUS-LABEL: private_memmory_with_offset:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vlw.v v0, 4(a0)
; VENTUS-NEXT: lui a1, %hi(global_int)
; VENTUS-NEXT: lw a1, %lo(global_int)(a1)
@ -200,7 +197,6 @@ entry:
define dso_local void @private_memmory_lh(ptr addrspace(5) nocapture noundef %a) local_unnamed_addr {
; VENTUS-LABEL: private_memmory_lh:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: lui a1, %hi(global_short)
; VENTUS-NEXT: lh a1, %lo(global_short)(a1)
; VENTUS-NEXT: vlh.v v0, 0(a0)
@ -219,7 +215,6 @@ entry:
define dso_local zeroext i16 @private_memmory_lhu(ptr addrspace(5) nocapture noundef readonly %a) local_unnamed_addr {
; VENTUS-LABEL: private_memmory_lhu:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vlhu.v v0, 0(a0)
; VENTUS-NEXT: ret
entry:
@ -231,7 +226,6 @@ entry:
define dso_local zeroext i8 @private_memmory_lbu(ptr addrspace(5) nocapture noundef readonly %a) local_unnamed_addr {
; VENTUS-LABEL: private_memmory_lbu:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vlbu.v v0, 0(a0)
; VENTUS-NEXT: ret
entry:

View File

@ -5,12 +5,12 @@
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
; VENTUS-LABEL: foo_fun:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: addi tp, tp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v32.l, 4
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v32.l, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: addi s0, tp, -8
; VENTUS-NEXT: .cfi_def_cfa s0, 0
@ -29,8 +29,8 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -8(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: ret
entry:

View File

@ -5,11 +5,11 @@
define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: foo_ker:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -8(sp)
; VENTUS-NEXT: sw s0, -12(sp)
; VENTUS-NEXT: sw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, 12
; VENTUS-NEXT: .cfi_def_cfa_offset 12
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: sw s1, -12(sp)
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset s0, 4
; VENTUS-NEXT: .cfi_offset s1, 0
@ -24,10 +24,10 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -8(sp)
; VENTUS-NEXT: lw s0, -12(sp)
; VENTUS-NEXT: lw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: lw s1, -12(sp)
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
@ -43,12 +43,12 @@ entry:
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
; VENTUS-LABEL: foo_fun:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: addi tp, tp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v32.l, 4
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v32.l, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: addi s0, tp, -8
; VENTUS-NEXT: .cfi_def_cfa s0, 0
@ -63,8 +63,8 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -8(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: ret
entry:

View File

@ -5,10 +5,11 @@
define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
; VENTUS-LABEL: _kernel:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: .cfi_def_cfa_offset 0
; VENTUS-NEXT: sw ra, -8(sp)
; VENTUS-NEXT: sw s0, -12(sp)
; VENTUS-NEXT: sw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, 12
; VENTUS-NEXT: .cfi_def_cfa_offset 12
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: sw s1, -12(sp)
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset s0, 4
; VENTUS-NEXT: .cfi_offset s1, 0
@ -23,9 +24,10 @@ define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vfadd.vv v1, v1, v2
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -8(sp)
; VENTUS-NEXT: lw s0, -12(sp)
; VENTUS-NEXT: lw s1, -16(sp)
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: lw s1, -12(sp)
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
@ -64,12 +66,11 @@ entry:
; THis non-kernel function takes 34 arguments, the range is beyond 32
; so the left two arguments need to be passed by tp stack
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none)
define dso_local i32 @non_kernel(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32,
ptr addrspace(5) nocapture noundef readonly %0, ptr addrspace(5) nocapture noundef readonly %1) {
define dso_local i32 @non_kernel(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32,
; VENTUS-LABEL: non_kernel:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vlw.v v49, 0(a0)
; VENTUS-NEXT: vlw.v v48, 0(a1)
; VENTUS-NEXT: vlw.v v48, 0(a0)
; VENTUS-NEXT: vlw.v v49, 0(a1)
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vlw12.v v1, 0(v1)
; VENTUS-NEXT: vlw12.v v2, 0(v2)
@ -132,12 +133,13 @@ ptr addrspace(5) nocapture noundef readonly %0, ptr addrspace(5) nocapture nound
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vadd.vv v0, v0, v3
; VENTUS-NEXT: vlw12.v v1, 0(v31)
; VENTUS-NEXT: vlw12.v v2, 0(v49)
; VENTUS-NEXT: vlw12.v v3, 0(v48)
; VENTUS-NEXT: vlw12.v v2, 0(v48)
; VENTUS-NEXT: vlw12.v v3, 0(v49)
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vadd.vv v0, v0, v3
; VENTUS-NEXT: ret
ptr addrspace(5) nocapture noundef readonly %0, ptr addrspace(5) nocapture noundef readonly %1) {
entry:
%a33 = load ptr, ptr addrspace(5) %0, align 4
%a34 = load ptr, ptr addrspace(5) %1, align 4

View File

@ -23,10 +23,10 @@ entry:
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp)
; VENTUS-NEXT: sw s0, -16(sp)
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: lw s0, 8(a0)
@ -37,9 +37,9 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
; VENTUS-NEXT: call bar
; VENTUS-NEXT: vmv.v.x v1, s0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw ra, -12(sp)
; VENTUS-NEXT: lw s0, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
%a.addr = alloca i32, align 4, addrspace(5)

View File

@ -12,9 +12,8 @@ target triple = "riscv32"
define dso_local i32 @printf(ptr noundef %fmt, ...) {
; VENTUS-LABEL: printf:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 32
; VENTUS-NEXT: addi tp, tp, 28
; VENTUS-NEXT: .cfi_def_cfa_offset 28
; VENTUS-NEXT: addi tp, tp, 76
; VENTUS-NEXT: .cfi_def_cfa_offset 76
; VENTUS-NEXT: li a0, 0
; VENTUS-NEXT: vsw.v v7, -28(tp)
; VENTUS-NEXT: vsw.v v6, -24(tp)
@ -45,8 +44,7 @@ define dso_local i32 @printf(ptr noundef %fmt, ...) {
; VENTUS-NEXT: blt a0, a4, .LBB0_1
; VENTUS-NEXT: .LBB0_2: # %for.end
; VENTUS-NEXT: vmv.v.x v0, a1
; VENTUS-NEXT: addi sp, sp, -32
; VENTUS-NEXT: addi tp, tp, -28
; VENTUS-NEXT: addi tp, tp, -76
; VENTUS-NEXT: ret
entry:
%fmt.addr = alloca ptr, align 4

View File

@ -6,35 +6,35 @@
define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-LABEL: branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 0
; VENTUS-NEXT: sw ra, -16(sp)
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: mv s0, tp
; VENTUS-NEXT: .cfi_def_cfa s0, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: li a1, 14
; VENTUS-NEXT: vmv.v.x v1, a1
; VENTUS-NEXT: li a1, 13
; VENTUS-NEXT: li a0, 14
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: li a0, 13
; VENTUS-NEXT: vblt v0, v1, .LBB0_5
; VENTUS-NEXT: # %bb.1: # %if.else
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: li a1, 18
; VENTUS-NEXT: bgeu a0, a1, .LBB0_3
; VENTUS-NEXT: vmv.x.s a1, v0
; VENTUS-NEXT: li a0, 18
; VENTUS-NEXT: bgeu a1, a0, .LBB0_3
; VENTUS-NEXT: join v0, v0, .LBB0_2
; VENTUS-NEXT: .LBB0_5:
; VENTUS-NEXT: join v0, v0, .LBB0_2
; VENTUS-NEXT: .LBB0_2: # %cleanup
; VENTUS-NEXT: vmv.v.x v0, a1
; VENTUS-NEXT: lw ra, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: join v0, v0, .LBB0_4
; VENTUS-NEXT: .LBB0_3: # %if.end3
; VENTUS-NEXT: li a0, 4
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: join v0, v0, .LBB0_4
; VENTUS-NEXT: .LBB0_4:
@ -60,22 +60,21 @@ cleanup: ; preds = %if.else, %entry, %i
define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp)
; VENTUS-NEXT: sw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s1, 0
; VENTUS-NEXT: mv s1, a0
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB1_4
; VENTUS-NEXT: vmv.v.x v1, zero
; VENTUS-NEXT: vbeq v0, v1, .LBB1_4
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
; VENTUS-NEXT: lw a3, 4(s1)
; VENTUS-NEXT: lw a1, 0(s1)
; VENTUS-NEXT: lw a3, 4(s0)
; VENTUS-NEXT: lw a1, 0(s0)
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: slli a4, a0, 2
; VENTUS-NEXT: add a1, a1, a4
; VENTUS-NEXT: lw a2, 0(a1)
@ -91,9 +90,9 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: .LBB1_4:
; VENTUS-NEXT: join v0, v0, .LBB1_3
; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp)
; VENTUS-NEXT: lw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
@ -202,31 +201,30 @@ for.body: ; preds = %for.body.lr.ph, %fo
define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: double_loop:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp)
; VENTUS-NEXT: sw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s1, 0
; VENTUS-NEXT: mv s1, a0
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB2_6
; VENTUS-NEXT: vmv.v.x v1, zero
; VENTUS-NEXT: vbeq v0, v1, .LBB2_6
; VENTUS-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
; VENTUS-NEXT: li a1, 0
; VENTUS-NEXT: lw a4, 4(s1)
; VENTUS-NEXT: lw a2, 0(s1)
; VENTUS-NEXT: slli a5, a0, 2
; VENTUS-NEXT: li a0, 0
; VENTUS-NEXT: lw a4, 4(s0)
; VENTUS-NEXT: lw a2, 0(s0)
; VENTUS-NEXT: vmv.x.s a1, v0
; VENTUS-NEXT: slli a5, a1, 2
; VENTUS-NEXT: add a2, a2, a5
; VENTUS-NEXT: lw a3, 0(a2)
; VENTUS-NEXT: add a4, a4, a5
; VENTUS-NEXT: .LBB2_2: # %for.cond1.preheader
; VENTUS-NEXT: # =>This Loop Header: Depth=1
; VENTUS-NEXT: # Child Loop BB2_3 Depth 2
; VENTUS-NEXT: mv a5, a0
; VENTUS-NEXT: mv a5, a1
; VENTUS-NEXT: .LBB2_3: # %for.body4
; VENTUS-NEXT: # Parent Loop BB2_2 Depth=1
; VENTUS-NEXT: # => This Inner Loop Header: Depth=2
@ -237,15 +235,15 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: bnez a5, .LBB2_3
; VENTUS-NEXT: # %bb.4: # %for.cond1.for.cond.cleanup3_crit_edge
; VENTUS-NEXT: # in Loop: Header=BB2_2 Depth=1
; VENTUS-NEXT: addi a1, a1, 1
; VENTUS-NEXT: bne a1, a0, .LBB2_2
; VENTUS-NEXT: addi a0, a0, 1
; VENTUS-NEXT: bne a0, a1, .LBB2_2
; VENTUS-NEXT: join v0, v0, .LBB2_5
; VENTUS-NEXT: .LBB2_6:
; VENTUS-NEXT: join v0, v0, .LBB2_5
; VENTUS-NEXT: .LBB2_5: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp)
; VENTUS-NEXT: lw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
@ -286,24 +284,23 @@ for.body4: ; preds = %for.cond1.preheader
define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_switch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -12(sp)
; VENTUS-NEXT: sw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: sw s0, -8(sp)
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset s1, 0
; VENTUS-NEXT: mv s1, a0
; VENTUS-NEXT: .cfi_offset s0, 0
; VENTUS-NEXT: mv s0, a0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: vmv.v.x v1, a0
; VENTUS-NEXT: vbeq v1, v0, .LBB3_10
; VENTUS-NEXT: vmv.v.x v1, zero
; VENTUS-NEXT: vbeq v0, v1, .LBB3_10
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
; VENTUS-NEXT: li a1, 0
; VENTUS-NEXT: lw a2, 4(s1)
; VENTUS-NEXT: lw a5, 0(s1)
; VENTUS-NEXT: slli a3, a0, 2
; VENTUS-NEXT: li a0, 0
; VENTUS-NEXT: lw a2, 4(s0)
; VENTUS-NEXT: lw a5, 0(s0)
; VENTUS-NEXT: vmv.x.s a1, v0
; VENTUS-NEXT: slli a3, a1, 2
; VENTUS-NEXT: add a2, a2, a3
; VENTUS-NEXT: add a3, a5, a3
; VENTUS-NEXT: addi a4, a5, 8
@ -322,20 +319,20 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: sw t1, 0(t0)
; VENTUS-NEXT: .LBB3_4: # %for.inc
; VENTUS-NEXT: # in Loop: Header=BB3_5 Depth=1
; VENTUS-NEXT: addi a1, a1, 1
; VENTUS-NEXT: beq a0, a1, .LBB3_9
; VENTUS-NEXT: addi a0, a0, 1
; VENTUS-NEXT: beq a1, a0, .LBB3_9
; VENTUS-NEXT: join v0, v0, .LBB3_9
; VENTUS-NEXT: .LBB3_5: # %for.body
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
; VENTUS-NEXT: beqz a1, .LBB3_4
; VENTUS-NEXT: beqz a0, .LBB3_4
; VENTUS-NEXT: # %bb.6: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB3_5 Depth=1
; VENTUS-NEXT: mv t0, a5
; VENTUS-NEXT: li t1, 2
; VENTUS-NEXT: beq a1, a6, .LBB3_3
; VENTUS-NEXT: beq a0, a6, .LBB3_3
; VENTUS-NEXT: # %bb.7: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB3_5 Depth=1
; VENTUS-NEXT: bne a1, a7, .LBB3_2
; VENTUS-NEXT: bne a0, a7, .LBB3_2
; VENTUS-NEXT: # %bb.8: # %sw.bb4
; VENTUS-NEXT: # in Loop: Header=BB3_5 Depth=1
; VENTUS-NEXT: li t1, 23
@ -344,9 +341,9 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: .LBB3_10:
; VENTUS-NEXT: join v0, v0, .LBB3_9
; VENTUS-NEXT: .LBB3_9: # %for.cond.cleanup
; VENTUS-NEXT: lw ra, -12(sp)
; VENTUS-NEXT: lw s1, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: lw s0, -8(sp)
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
@ -395,9 +392,9 @@ for.inc: ; preds = %for.inc.sink.split,
define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
; VENTUS-LABEL: _Z13get_global_idj:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 0
; VENTUS-NEXT: sw ra, -16(sp)
; VENTUS-NEXT: sw ra, -4(sp)
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: mv s0, tp
; VENTUS-NEXT: .cfi_def_cfa s0, 0
@ -428,8 +425,8 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: join v0, v0, .LBB4_8
; VENTUS-NEXT: .LBB4_8: # %return
; VENTUS-NEXT: vmv.v.x v0, a0
; VENTUS-NEXT: lw ra, -16(sp)
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: lw ra, -4(sp)
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: ret
entry:
switch i32 %dim, label %return [