[VENTUS][fix] Refactor the tp&sp stack size and frame object's offset calculation

In traditional llvm framework, the function stack just contains one single kind
stack pointer, for RISCV, they are sp&s0 registers, in ventus, because of the
existence of perthread private memory, we design a new perthread stack, which
will also use the apis of MachineFrameInfo, so the frame objects' offsets calculation
will result in error if we follow the official RISCV way, this patch will identify ID
for every different frame object, and then only calculate stack offset for same identity
stack object regardless of other stack object
This commit is contained in:
zhoujingya 2023-11-24 17:59:15 +08:00
parent e4582536e7
commit 650f1199e9
14 changed files with 169 additions and 149 deletions

View File

@ -300,7 +300,8 @@ getNonLibcallCSI(const MachineFunction &MF,
// TODO: For now, we don't define VGPR callee saved registers, when we later
// add VGPR callee saved register, remember to modify here
if (FI >= 0 && (MFI.getStackID(FI) == RISCVStackID::Default ||
MFI.getStackID(FI) == RISCVStackID::SGPRSpill))
MFI.getStackID(FI) == RISCVStackID::SGPRSpill ||
MFI.getStackID(FI) == RISCVStackID::VGPRSpill))
NonLibcallCSI.push_back(CS);
}
@ -505,18 +506,23 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
emitSCSEpilogue(MF, MBB, MBBI, DL);
}
uint64_t RISCVFrameLowering::getExtractedStackOffset(const MachineFunction &MF,
unsigned FI, RISCVStackID::Value Stack) const {
uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
unsigned FI,
RISCVStackID::Value Stack) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = 0;
for(int I = FI + 1; I != MFI.getObjectIndexEnd(); I++) {
if(static_cast<unsigned>(MFI.getStackID(I)) != Stack) {
for (int I = MFI.getObjectIndexBegin(); I != (int)FI + 1; I++) {
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
// Need to consider the alignment for different frame index
uint64_t Size = MFI.getObjectSize(I);
StackSize += Size;
Align Alignment =
MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
uint64_t AlignedSize = alignTo(MFI.getObjectSize(I), Alignment);
StackSize += AlignedSize;
}
}
return StackSize;
return alignTo(StackSize, MFI.getObjectAlign(FI).value() <= 4
? Align(4)
: MFI.getObjectAlign(FI));
}
StackOffset
@ -536,33 +542,11 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
StackID == RISCVStackID::SGPRSpill ||
StackID == RISCVStackID::VGPRSpill) &&
"Unexpected stack ID for the frame object.");
uint8_t Stack = MFI.getStackID(FI);
StackOffset Offset =
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea()
-getExtractedStackOffset(MF, FI, RISCVStackID::Value(Stack))
+ MFI.getOffsetAdjustment());
// Different stacks for sALU and vALU threads.
FrameReg = StackID == RISCVStackID::SGPRSpill ? RISCV::X2 : RISCV::X4;
if (CSI.size()) {
// For callee saved registers
MinCSFI = CSI[0].getFrameIdx();
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
if (FI >= MinCSFI && FI <= MaxCSFI) {
Offset -= StackOffset::getFixed(RVFI->getVarArgsSaveSize());
return Offset;
}
}
// TODO: This only saves sGPR CSRs, as we haven't define vGPR CSRs
// within getNonLibcallCSI.
// if (FI >= MinCSFI && FI <= MaxCSFI) {
Offset -= StackOffset::getFixed(
getStackSize(const_cast<MachineFunction&>(MF),
(RISCVStackID::Value)StackID));
return Offset;
FrameReg = StackID == RISCVStackID::VGPRSpill ? RISCV::X4 : RISCV::X2;
return -StackOffset::getFixed(
getStackOffset(MF, FI, (RISCVStackID::Value)StackID));
}
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
@ -702,16 +686,17 @@ uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
RISCVStackID::Value ID) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = 0;
Align Alignment = Align(4);
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
// Need to consider the alignment for different frame index
uint64_t Size = ((MFI.getObjectSize(I) + 3) >> 2) * 4;
StackSize += Size;
// FIXME: this code logic maybe not that correct?
StackSize += ((MFI.getObjectSize(I) + 3) >> 2) * 4;
// Get frame object largest alignment
Alignment = std::max(MFI.getObjectAlign(I), Alignment);
}
}
return StackSize;
// FIXME: maybe this alignment is too simple?
return alignTo(StackSize, Alignment);
}
void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
@ -760,17 +745,17 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
// TODO: Have we allocated stack for vGPR spilling?
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255) {
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
// FIXME: Right now, no vgpr callee saved register, maybe later needed
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
RC, TRI);
} else {
assert(Reg.id() >= RISCV::V32 && Reg.id() <= RISCV::V255 && "TODO");
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
RC, TRI);
}
// else {
// FIXME: Right now, no callee saved register for VGPR
// MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
// }
}
return true;
@ -798,8 +783,7 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
for (auto &CS : NonLibcallCSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 )
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
}

View File

@ -70,14 +70,22 @@ public:
/// Get stack size for different stack ID
uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const;
/// Calculate frame object's stack offset
/// Frame Objects:
/// fi#0: id=4 size=48, align=4, at location [SP+8]
/// fi#1: id=1 size=4, align=4, at location [SP+4] \
/// fi#2: id=1 size=4, align=4, at location [SP] \
/// fi#3: id=4 size=4, align=4, at location [SP+16] \
/// As we can see, if we split the stack, different frame offset calculation
/// need to be modified too, when calculate the TP stack offset, we need to
/// extract the stack offset of 'SP' in machine function frame
uint64_t getExtractedStackOffset(const MachineFunction &MF, unsigned FI,
/// need to be modified too, basic routine follows belows:
/// 1st: Mark all the frame object, and give them unique identifier id, in
/// ventus, they will be: Default, VGPR, SGPR
///
/// 2st: Calculate frame offset for different stack identifier, unlike
/// traditional riscv stack frame offset calculation, we simply this
/// procedure, we do not have to care about RVV .etc
uint64_t getStackOffset(const MachineFunction &MF, unsigned FI,
RISCVStackID::Value Stack) const;
/// Before insert prolog/epilog information, set stack ID for each frame index

View File

@ -12,12 +12,14 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: regext zero, zero, 1
@ -25,17 +27,19 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z12get_local_idj
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v0, v0, t1
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vsll.vi v1, v33, 2
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v1, v1, t0
; VENTUS-NEXT: vlw12.v v2, 0(v1)
; VENTUS-NEXT: vadd.vv v0, v2, v0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: ret

View File

@ -14,16 +14,18 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -24(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v33, t0
; VENTUS-NEXT: lui t1, %hi(foo.b)
; VENTUS-NEXT: addi t2, t1, %lo(foo.b)
; VENTUS-NEXT: addi t1, tp, -24
; VENTUS-NEXT: addi t1, tp, -20
; VENTUS-NEXT: vmv.v.x v0, t1
; VENTUS-NEXT: sw t2, 16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw t2, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v1, t2
; VENTUS-NEXT: vmv.v.x v2, t0
; VENTUS-NEXT: call bar
@ -37,10 +39,10 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: vbltu v1, v0, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %if.then
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: addi t0, tp, -24
; VENTUS-NEXT: addi t0, tp, -20
; VENTUS-NEXT: vadd.vx v1, v0, t0
; VENTUS-NEXT: vlw.v v1, 0(v1)
; VENTUS-NEXT: lw t1, 16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v2, v0, t1
; VENTUS-NEXT: vlw12.v v2, 0(v2)
; VENTUS-NEXT: regext zero, zero, 64
@ -58,8 +60,11 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: vadd.vv v0, v33, v0
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: .LBB0_3: # %if.end
; VENTUS-NEXT: join
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -24(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: addi tp, tp, -24
; VENTUS-NEXT: ret
@ -241,10 +246,9 @@ define dso_local ventus_kernel void @local_memmory1(ptr addrspace(3) nocapture n
; VENTUS-LABEL: local_memmory1:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vlw12.v v1, 0(v0)
; VENTUS-NEXT: vadd.vi v1, v1, 1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw t1, 0(t0)
; VENTUS-NEXT: addi t1, t1, 1
; VENTUS-NEXT: sw t1, 0(t0)
; VENTUS-NEXT: ret
entry:
%0 = load i32, ptr addrspace(3) %b, align 4

View File

@ -8,7 +8,7 @@ define dso_local ventus_kernel void @bitcast(float noundef %a, ptr addrspace(5)
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: flw t1, 0(a0)
; VENTUS-NEXT: lw t2, 4(a0)
; VENTUS-NEXT: vfmv.s.f v0, t1
; VENTUS-NEXT: vmv.v.x v0, t1
; VENTUS-NEXT: vmv.v.x v1, t2
; VENTUS-NEXT: vsw.v v0, 0(v1)
; VENTUS-NEXT: fsw t1, 0(t0)

View File

@ -14,7 +14,8 @@ define i32 @foo(i32 noundef %cond, i32 noundef %a, i32 noundef %b, i32 noundef %
; VENTUS-NEXT: # %bb.1:
; VENTUS-NEXT: vrsub.vi v3, v3, 0
; VENTUS-NEXT: .LBB0_2: # %entry
; VENTUS-NEXT: join
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: vmadd.vv v2, v1, v3
; VENTUS-NEXT: vadd.vx v0, v2, zero
; VENTUS-NEXT: ret

View File

@ -12,8 +12,12 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v1, zero
@ -31,6 +35,10 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: ret

View File

@ -10,16 +10,16 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v1, v0, t1
; VENTUS-NEXT: vlw12.v v1, 0(v1)
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v0, v0, t0
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
@ -48,8 +48,12 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v1, zero
@ -67,6 +71,10 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: ret

View File

@ -15,14 +15,14 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1)
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: lw s0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s0)
; VENTUS-NEXT: lw t2, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, 0(t2)
@ -30,12 +30,11 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1)
; VENTUS-NEXT: vadd.vx v0, v0, t1
; VENTUS-NEXT: vmv.v.x v1, s0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw t0, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vlw12.v v1, 0(v0)
; VENTUS-NEXT: lw t0, 0(t2)
; VENTUS-NEXT: vadd.vx v1, v1, t0
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw s0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s0)
; VENTUS-NEXT: lw t2, 0(t2)
; VENTUS-NEXT: add t0, t2, t0
; VENTUS-NEXT: sw t0, 0(s0)
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: ret
@ -82,16 +81,14 @@ entry:
define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32, ptr addrspace(3) nocapture noundef readonly %a33, ptr addrspace(5) nocapture noundef readonly %a34) local_unnamed_addr #2 {
; VENTUS-LABEL: non_kernel_calling_convention:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 28
; VENTUS-NEXT: .cfi_def_cfa_offset 28
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -24(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -28(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -24(v32)
; VENTUS-NEXT: vlw.v v33, -4(v32)
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -28(v32)
; VENTUS-NEXT: vlw.v v34, -32(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vlw12.v v1, 0(v1)
; VENTUS-NEXT: vlw12.v v2, 0(v2)
@ -161,7 +158,11 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vadd.vv v0, v0, v3
; VENTUS-NEXT: addi tp, tp, -28
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -24(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -28(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: ret
entry:
%0 = load i32, ptr %a1, align 4
@ -275,18 +276,18 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vadd.vi v0, v0, 1
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v1)
; VENTUS-NEXT: vadd.vi v0, v0, 2
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: addi t0, tp, -8
; VENTUS-NEXT: addi t1, tp, -4
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: addi t0, tp, -4
; VENTUS-NEXT: addi t1, tp, -8
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vmv.v.x v1, t1
; VENTUS-NEXT: call add
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vlw.v v1, -8(v32)
; VENTUS-NEXT: vlw.v v1, -4(v32)
; VENTUS-NEXT: vadd.vv v0, v1, v0
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4

View File

@ -6,11 +6,11 @@
define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; VENTUS-LABEL: fadd:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: flw t0, 4(a0)
; VENTUS-NEXT: flw t1, 0(a0)
; VENTUS-NEXT: fadd.s t0, t1, t0
; VENTUS-NEXT: lw t1, 8(a0)
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: fsw t0, 0(t1)
; VENTUS-NEXT: ret
entry:
%add1 = fadd float %c, %d
@ -21,11 +21,11 @@ entry:
define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; VENTUS-LABEL: fsub:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: flw t0, 4(a0)
; VENTUS-NEXT: flw t1, 0(a0)
; VENTUS-NEXT: fsub.s t0, t1, t0
; VENTUS-NEXT: lw t1, 8(a0)
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: fsw t0, 0(t1)
; VENTUS-NEXT: ret
entry:
%sub = fsub float %c, %d
@ -36,11 +36,11 @@ entry:
define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; VENTUS-LABEL: fmul:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: flw t0, 4(a0)
; VENTUS-NEXT: flw t1, 0(a0)
; VENTUS-NEXT: fmul.s t0, t1, t0
; VENTUS-NEXT: lw t1, 8(a0)
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: fsw t0, 0(t1)
; VENTUS-NEXT: ret
entry:
%mul = fmul float %c, %d
@ -51,11 +51,11 @@ entry:
define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; VENTUS-LABEL: fdiv:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: flw t0, 4(a0)
; VENTUS-NEXT: flw t1, 0(a0)
; VENTUS-NEXT: fdiv.s t0, t1, t0
; VENTUS-NEXT: lw t1, 8(a0)
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: fsw t0, 0(t1)
; VENTUS-NEXT: ret
entry:
%div = fdiv float %c, %d
@ -66,12 +66,12 @@ entry:
define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; VENTUS-LABEL: fmadd:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: lw t1, 4(a0)
; VENTUS-NEXT: lw t2, 0(a0)
; VENTUS-NEXT: flw t0, 8(a0)
; VENTUS-NEXT: flw t1, 4(a0)
; VENTUS-NEXT: flw t2, 0(a0)
; VENTUS-NEXT: fmadd.s t0, t2, t1, t0
; VENTUS-NEXT: lw t1, 12(a0)
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: fsw t0, 0(t1)
; VENTUS-NEXT: ret
entry:
%div = call float @llvm.fma.f32(float %a, float %b, float %c)

View File

@ -16,7 +16,7 @@ define float @fadd_f(float noundef %a) {
; VENTUS-LABEL: fadd_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(global_val)
; VENTUS-NEXT: lw t0, %lo(global_val)(t0)
; VENTUS-NEXT: flw t0, %lo(global_val)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfadd.vv v0, v0, v1
; VENTUS-NEXT: ret
@ -40,7 +40,7 @@ define float @fsub_f(float noundef %a) {
; VENTUS-LABEL: fsub_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(global_val)
; VENTUS-NEXT: lw t0, %lo(global_val)(t0)
; VENTUS-NEXT: flw t0, %lo(global_val)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfsub.vv v0, v0, v1
; VENTUS-NEXT: ret
@ -64,7 +64,7 @@ define float @fmul_f(float noundef %a) {
; VENTUS-LABEL: fmul_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(global_val)
; VENTUS-NEXT: lw t0, %lo(global_val)(t0)
; VENTUS-NEXT: flw t0, %lo(global_val)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfmul.vv v0, v0, v1
; VENTUS-NEXT: ret
@ -88,7 +88,7 @@ define float @fdiv_f(float noundef %a, float noundef %b) {
; VENTUS-LABEL: fdiv_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(global_val)
; VENTUS-NEXT: lw t0, %lo(global_val)(t0)
; VENTUS-NEXT: flw t0, %lo(global_val)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfdiv.vv v0, v0, v1
; VENTUS-NEXT: ret
@ -102,7 +102,7 @@ define float @foo_constant(float noundef %a) {
; VENTUS-LABEL: foo_constant:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI8_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI8_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI8_0)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfmul.vv v0, v0, v1
; VENTUS-NEXT: ret
@ -193,7 +193,7 @@ define dso_local float @fgt(float noundef %a) {
; VENTUS-LABEL: fgt:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI14_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI14_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI14_0)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vmflt.vv v0, v1, v0
; VENTUS-NEXT: vsll.vi v0, v0, 2
@ -217,7 +217,7 @@ define dso_local float @fge(float noundef %a) {
; VENTUS-LABEL: fge:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI15_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI15_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI15_0)(t0)
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vmfle.vv v0, v1, v0
; VENTUS-NEXT: vsll.vi v0, v0, 2
@ -332,7 +332,7 @@ define dso_local float @fmadd_f(float noundef %a, float noundef %b, float nounde
; VENTUS-LABEL: fmadd_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI24_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI24_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI24_0)(t0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
@ -360,7 +360,7 @@ define dso_local float @fnmadd_f(float noundef %a, float noundef %b, float nound
; VENTUS-LABEL: fnmadd_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI26_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI26_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI26_0)(t0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfmsub.vv v0, v1, v2
@ -388,7 +388,7 @@ define dso_local float @fmsub_f(float noundef %a, float noundef %b) local_unname
; VENTUS-LABEL: fmsub_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI28_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI28_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI28_0)(t0)
; VENTUS-NEXT: vmv.v.x v2, t0
; VENTUS-NEXT: vfmsub.vv v0, v2, v1
; VENTUS-NEXT: ret
@ -415,7 +415,7 @@ define dso_local float @fnmsub_f(float noundef %a, float noundef %b, float nound
; VENTUS-LABEL: fnmsub_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui t0, %hi(.LCPI30_0)
; VENTUS-NEXT: lw t0, %lo(.LCPI30_0)(t0)
; VENTUS-NEXT: flw t0, %lo(.LCPI30_0)(t0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2

View File

@ -14,14 +14,11 @@ define dso_local ventus_kernel void @usage(ptr addrspace(1) nocapture noundef al
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: lw t0, 0(t1)
; VENTUS-NEXT: vadd.vx v0, v0, t0
; VENTUS-NEXT: vmv.v.x v1, t1
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw t0, 0(t0)
; VENTUS-NEXT: lw t2, 0(t1)
; VENTUS-NEXT: add t0, t2, t0
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: barrier x0, x0, 1
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: ret
entry:

View File

@ -13,23 +13,23 @@ target triple = "riscv32"
define dso_local i32 @printf(ptr addrspace(2) noundef %fmt, ...) {
; VENTUS-LABEL: printf:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 64
; VENTUS-NEXT: .cfi_def_cfa_offset 64
; VENTUS-NEXT: addi tp, tp, 48
; VENTUS-NEXT: .cfi_def_cfa_offset 48
; VENTUS-NEXT: vmv.v.x v8, tp
; VENTUS-NEXT: vsw.v v7, -60(v8)
; VENTUS-NEXT: vsw.v v6, -56(v8)
; VENTUS-NEXT: vsw.v v5, -52(v8)
; VENTUS-NEXT: vsw.v v4, -48(v8)
; VENTUS-NEXT: vsw.v v3, -44(v8)
; VENTUS-NEXT: vsw.v v2, -40(v8)
; VENTUS-NEXT: vsw.v v1, -36(v8)
; VENTUS-NEXT: addi t0, tp, -36
; VENTUS-NEXT: vsw.v v7, -4(v8)
; VENTUS-NEXT: vsw.v v6, -16(v8)
; VENTUS-NEXT: vsw.v v5, -16(v8)
; VENTUS-NEXT: vsw.v v4, -32(v8)
; VENTUS-NEXT: vsw.v v3, -36(v8)
; VENTUS-NEXT: vsw.v v2, -48(v8)
; VENTUS-NEXT: vsw.v v1, -48(v8)
; VENTUS-NEXT: addi t0, tp, -60
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vsw.v v0, -36(v8)
; VENTUS-NEXT: addi t0, tp, -32
; VENTUS-NEXT: vsw.v v0, -64(v8)
; VENTUS-NEXT: addi t0, tp, -56
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vsw.v v0, -36(v8)
; VENTUS-NEXT: addi tp, tp, -64
; VENTUS-NEXT: vsw.v v0, -64(v8)
; VENTUS-NEXT: addi tp, tp, -48
; VENTUS-NEXT: ret
entry:
%retval = alloca i32, align 4, addrspace(5)

View File

@ -88,7 +88,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
; VENTUS-NEXT: vlw12.v v4, 0(v3)
; VENTUS-NEXT: vadd.vv v2, v2, v4
; VENTUS-NEXT: vadd.vi v0, v0, -1
; VENTUS-NEXT: vsub12.vi v0, v0, 1
; VENTUS-NEXT: vsw12.v v2, 0(v1)
; VENTUS-NEXT: .Lpcrel_hi3:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB1_3)
@ -135,7 +135,9 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
@ -169,6 +171,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: vblt v0, v33, .LBB2_5
; VENTUS-NEXT: # %bb.3: # %if.then2
; VENTUS-NEXT: li t0, 23
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: j .LBB2_6
; VENTUS-NEXT: .LBB2_4: # %if.end7
; VENTUS-NEXT: li t0, 4
@ -177,14 +180,16 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: j .LBB2_7
; VENTUS-NEXT: .LBB2_5:
; VENTUS-NEXT: li t0, 12
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: .LBB2_6: # %cleanup9
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: .LBB2_7: # %cleanup9
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: ret
@ -249,7 +254,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: # => This Inner Loop Header: Depth=2
; VENTUS-NEXT: vlw12.v v5, 0(v3)
; VENTUS-NEXT: vadd.vv v2, v2, v5
; VENTUS-NEXT: vadd.vi v4, v4, -1
; VENTUS-NEXT: vsub12.vi v4, v4, 1
; VENTUS-NEXT: vsw12.v v2, 0(v1)
; VENTUS-NEXT: .Lpcrel_hi8:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB3_4)