forked from OSchip/llvm-project
[VENTUS][RISCV][fix] Add initial Tp stack size calculation
Cause there are two stacks in Ventus, we need to seperate TP stack and SP stack, this commit just add very initial support for TP stack size calculation
This commit is contained in:
parent
180b3d4429
commit
faf6a0bcd9
|
@ -11,6 +11,8 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RISCVFrameLowering.h"
|
||||
#include "MCTargetDesc/RISCVMCTargetDesc.h"
|
||||
#include "RISCV.h"
|
||||
#include "RISCVMachineFunctionInfo.h"
|
||||
#include "RISCVSubtarget.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
|
@ -295,8 +297,10 @@ getNonLibcallCSI(const MachineFunction &MF,
|
|||
|
||||
for (auto &CS : CSI) {
|
||||
int FI = CS.getFrameIdx();
|
||||
if (FI >= 0 && (MFI.getStackID(FI) == TargetStackID::Default ||
|
||||
MFI.getStackID(FI) == TargetStackID::SGPRSpill))
|
||||
// TODO: For now, we don't define VGPR callee saved registers, when we later
|
||||
// add VGPR callee saved register, remember to modify here
|
||||
if (FI >= 0 && (MFI.getStackID(FI) == RISCVStackID::Default ||
|
||||
MFI.getStackID(FI) == RISCVStackID::SGPRSpill))
|
||||
NonLibcallCSI.push_back(CS);
|
||||
}
|
||||
|
||||
|
@ -306,10 +310,11 @@ getNonLibcallCSI(const MachineFunction &MF,
|
|||
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
||||
auto *RMFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
bool IsEntryFunction = RMFI->isEntryFunction();
|
||||
|
||||
Register FPReg = getFPReg(STI);
|
||||
Register SPReg = getSPReg(STI);
|
||||
|
@ -336,6 +341,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
// Determine the correct frame layout
|
||||
determineFrameLayout(MF);
|
||||
|
||||
// Determine stack ID for each frame index
|
||||
deterMineStackID(MF);
|
||||
|
||||
// If libcalls are used to spill and restore callee-saved registers, the frame
|
||||
// has two sections; the opaque section managed by the libcalls, and the
|
||||
// section managed by MachineFrameInfo which can also hold callee saved
|
||||
|
@ -358,12 +366,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
// Calculate the size of the frame managed by the libcall. The libcalls are
|
||||
// implemented such that the stack will always be 16 byte aligned.
|
||||
unsigned LibCallFrameSize = alignTo((STI.getXLen() / 8) * LibCallRegs, 16);
|
||||
RVFI->setLibCallStackSize(LibCallFrameSize);
|
||||
RMFI->setLibCallStackSize(LibCallFrameSize);
|
||||
}
|
||||
|
||||
// FIXME: Need to get 2 stack size for TP and SP!
|
||||
// FIXME: SP stack size calculation need to be later changed
|
||||
// FIXME: TP stack size calculation is also not
|
||||
uint64_t StackSize = MFI.getStackSize();
|
||||
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
|
||||
uint64_t TPStackSize = getTPStackSize(MF);
|
||||
uint64_t RealStackSize = IsEntryFunction ? StackSize :
|
||||
TPStackSize + RMFI->getLibCallStackSize();
|
||||
|
||||
// Early exit if there is no need to allocate on the stack
|
||||
if (RealStackSize == 0 && !MFI.adjustsStack())
|
||||
|
@ -388,11 +399,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||
if(StackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
else
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg, StackOffset::getFixed(StackSize),
|
||||
if(TPStackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||
StackOffset::getFixed(TPStackSize),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
|
||||
// Emit ".cfi_def_cfa_offset RealStackSize"
|
||||
|
@ -423,7 +435,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
Offset = FrameIdx * (int64_t) STI.getXLen() / 8;
|
||||
else
|
||||
Offset = MFI.getObjectOffset(Entry.getFrameIdx()) -
|
||||
RVFI->getLibCallStackSize();
|
||||
RMFI->getLibCallStackSize();
|
||||
Register Reg = Entry.getReg();
|
||||
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
|
||||
nullptr, RI->getDwarfRegNum(Reg, true), Offset));
|
||||
|
@ -438,15 +450,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
|
||||
MF.getFunction(), "Frame pointer required, but has been reserved."});
|
||||
// The frame pointer does need to be reserved from register allocation.
|
||||
assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
|
||||
// assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
|
||||
|
||||
RI->adjustReg(MBB, MBBI, DL, FPReg, SPReg,
|
||||
StackOffset::getFixed(RealStackSize - RVFI->getVarArgsSaveSize()),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
RI->adjustReg(MBB, MBBI, DL, FPReg, IsEntryFunction ? SPReg : TPReg,
|
||||
-StackOffset::getFixed(RealStackSize - RMFI->getVarArgsSaveSize()),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
|
||||
// Emit ".cfi_def_cfa $fp, RVFI->getVarArgsSaveSize()"
|
||||
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
|
||||
nullptr, RI->getDwarfRegNum(FPReg, true), RVFI->getVarArgsSaveSize()));
|
||||
nullptr, RI->getDwarfRegNum(FPReg, true), RMFI->getVarArgsSaveSize()));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
@ -550,6 +562,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
|
||||
// FIXME: Need to get 2 stack size for TP and SP!
|
||||
uint64_t StackSize = MFI.getStackSize();
|
||||
uint64_t TPStackSize = getTPStackSize(MF);
|
||||
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
|
||||
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
|
||||
|
||||
|
@ -589,11 +602,12 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
|
||||
// FIXME: Allocate space for two stacks, this is depend on the actual use of
|
||||
// these two stacks, not based on calling convention
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||
if(StackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
|
||||
MachineInstr::FrameDestroy, getStackAlign());
|
||||
else
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg, StackOffset::getFixed(-StackSize),
|
||||
if(TPStackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||
StackOffset::getFixed(-TPStackSize),
|
||||
MachineInstr::FrameDestroy, getStackAlign());
|
||||
|
||||
// Emit epilogue for shadow call stack.
|
||||
|
@ -615,8 +629,9 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
int MaxCSFI = -1;
|
||||
auto StackID = MFI.getStackID(FI);
|
||||
|
||||
assert((StackID == TargetStackID::Default ||
|
||||
StackID == TargetStackID::SGPRSpill) &&
|
||||
assert((StackID == RISCVStackID::Default ||
|
||||
StackID == RISCVStackID::SGPRSpill ||
|
||||
StackID == RISCVStackID::VGPRSpill) &&
|
||||
"Unexpected stack ID for the frame object.");
|
||||
StackOffset Offset =
|
||||
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
|
||||
|
@ -855,6 +870,27 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
uint64_t RISCVFrameLowering::getTPStackSize(MachineFunction &MF) const {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
uint64_t TPStackSize = 0;
|
||||
|
||||
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
||||
if(static_cast<unsigned>(MFI.getStackID(I)) == RISCVStackID::VGPRSpill)
|
||||
TPStackSize += MFI.getObjectSize(I);
|
||||
}
|
||||
return TPStackSize;
|
||||
}
|
||||
|
||||
void RISCVFrameLowering::deterMineStackID(MachineFunction &MF) const {
|
||||
llvm::MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
|
||||
if((MFI.getStackID(I) != RISCVStackID::SGPRSpill) &&
|
||||
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
||||
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||
}
|
||||
}
|
||||
|
||||
bool RISCVFrameLowering::spillCalleeSavedRegisters(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
|
||||
|
@ -884,9 +920,14 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
|
|||
for (auto &CS : NonLibcallCSI) {
|
||||
// Insert the spill to the stack frame.
|
||||
Register Reg = CS.getReg();
|
||||
|
||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 ) {
|
||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||
RC, TRI);
|
||||
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -914,7 +955,8 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
|
|||
for (auto &CS : NonLibcallCSI) {
|
||||
Register Reg = CS.getReg();
|
||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
|
||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 )
|
||||
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
|
||||
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
|
||||
}
|
||||
|
||||
|
@ -992,14 +1034,16 @@ bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
|
|||
}
|
||||
|
||||
bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
||||
switch (ID) {
|
||||
case TargetStackID::Default:
|
||||
case TargetStackID::SGPRSpill:
|
||||
RISCVStackID::Value StackID = (RISCVStackID::Value)ID;
|
||||
switch (StackID) {
|
||||
case RISCVStackID::Default:
|
||||
case RISCVStackID::SGPRSpill:
|
||||
case RISCVStackID::VGPRSpill:
|
||||
return true;
|
||||
case TargetStackID::ScalableVector:
|
||||
case TargetStackID::NoAlloc:
|
||||
case TargetStackID::WasmLocal:
|
||||
case RISCVStackID::ScalableVector:
|
||||
case RISCVStackID::NoAlloc:
|
||||
case RISCVStackID::WasmLocal:
|
||||
return false;
|
||||
}
|
||||
llvm_unreachable("Invalid TargetStackID::Value");
|
||||
llvm_unreachable("Invalid RISCVStackID::Value");
|
||||
}
|
||||
|
|
|
@ -63,6 +63,12 @@ public:
|
|||
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
|
||||
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
|
||||
|
||||
/// Get TP stack size for each stack
|
||||
uint64_t getTPStackSize(MachineFunction &MF) const;
|
||||
|
||||
/// Before insert prolog/epilog information, set stack ID for each frame index
|
||||
void deterMineStackID(MachineFunction &MF) const;
|
||||
|
||||
bool enableShrinkWrapping(const MachineFunction &MF) const override;
|
||||
|
||||
bool isSupportedStackID(TargetStackID::Value ID) const override;
|
||||
|
|
|
@ -849,6 +849,97 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectPriAddrRegImm(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
if (SelectAddrFrameIndex(Addr, Base, Offset))
|
||||
return true;
|
||||
|
||||
SDLoc DL(Addr);
|
||||
MVT VT = Addr.getSimpleValueType();
|
||||
|
||||
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = Addr.getOperand(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
|
||||
if (isInt<12>(CVal)) {
|
||||
Base = Addr.getOperand(0);
|
||||
if (Base.getOpcode() == RISCVISD::ADD_LO) {
|
||||
SDValue LoOperand = Base.getOperand(1);
|
||||
if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
|
||||
// If the Lo in (ADD_LO hi, lo) is a global variable's address
|
||||
// (its low part, really), then we can rely on the alignment of that
|
||||
// variable to provide a margin of safety before low part can overflow
|
||||
// the 12 bits of the load/store offset. Check if CVal falls within
|
||||
// that margin; if so (low part + CVal) can't overflow.
|
||||
const DataLayout &DL = CurDAG->getDataLayout();
|
||||
Align Alignment = commonAlignment(
|
||||
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
|
||||
if (CVal == 0 || Alignment > CVal) {
|
||||
int64_t CombinedOffset = CVal + GA->getOffset();
|
||||
Base = Base.getOperand(0);
|
||||
Offset = CurDAG->getTargetGlobalAddress(
|
||||
GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
|
||||
CombinedOffset, GA->getTargetFlags());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
|
||||
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
|
||||
Offset = CurDAG->getTargetConstant(CVal, DL, VT);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle ADD with large immediates.
|
||||
if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
|
||||
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
|
||||
assert(!isInt<12>(CVal) && "simm12 not already handled?");
|
||||
|
||||
// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
|
||||
// an ADDI for part of the offset and fold the rest into the load/store.
|
||||
// This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
|
||||
if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
|
||||
int64_t Adj = CVal < 0 ? -2048 : 2047;
|
||||
Base = SDValue(
|
||||
CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
|
||||
CurDAG->getTargetConstant(Adj, DL, VT)),
|
||||
0);
|
||||
Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
|
||||
return true;
|
||||
}
|
||||
|
||||
// For larger immediates, we might be able to save one instruction from
|
||||
// constant materialization by folding the Lo12 bits of the immediate into
|
||||
// the address. We should only do this if the ADD is only used by loads and
|
||||
// stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
|
||||
// separately with the full materialized immediate creating extra
|
||||
// instructions.
|
||||
if (isWorthFoldingAdd(Addr) &&
|
||||
selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
|
||||
Offset)) {
|
||||
// Insert an ADD instruction with the materialized Hi52 bits.
|
||||
Base = SDValue(
|
||||
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
|
||||
0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
|
||||
return true;
|
||||
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, DL, VT);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME: This is almost identical to SelectAddrRegImm now, but it will be
|
||||
// modified to support more vALU addressing patterns.
|
||||
bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
|
||||
|
|
|
@ -50,6 +50,7 @@ public:
|
|||
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectPriAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
bool tryShrinkShlLogicImm(SDNode *Node);
|
||||
|
||||
|
|
|
@ -5740,6 +5740,8 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
|
|||
}
|
||||
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
|
||||
/*IsImmutable=*/true);
|
||||
// This is essential for calculating stack size for VGPRSpill
|
||||
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
SDValue Val;
|
||||
|
||||
|
@ -5753,9 +5755,11 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
|
|||
ExtType = ISD::NON_EXTLOAD;
|
||||
break;
|
||||
}
|
||||
Val = DAG.getExtLoad(
|
||||
ExtType, DL, LocVT, Chain, FIN,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
|
||||
MachinePointerInfo PtrInfo =
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
|
||||
assert(PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS &&
|
||||
"Expecting non-kernel function arguments unpack from private memory");
|
||||
Val = DAG.getExtLoad(ExtType, DL, LocVT, Chain, FIN, PtrInfo, ValVT);
|
||||
return Val;
|
||||
}
|
||||
|
||||
|
@ -5908,7 +5912,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
|
||||
// Record the frame index of the first variable argument
|
||||
// which is a value necessary to VASTART.
|
||||
int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
||||
int FI = MFI.CreateFixedObject(XLenInBytes, 0, true);
|
||||
RVFI->setVarArgsFrameIndex(FI);
|
||||
|
||||
// If saving an odd number of registers then create an extra stack slot to
|
||||
|
@ -5929,6 +5933,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
RegInfo.addLiveIn(ArgRegs[I], Reg);
|
||||
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
|
||||
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
||||
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
||||
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
||||
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
|
||||
MachinePointerInfo::getFixedStack(MF, FI));
|
||||
|
|
|
@ -308,8 +308,8 @@ Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
|||
MF.getInfo<RISCVMachineFunctionInfo>();
|
||||
if(FuncInfo->isEntryFunction())
|
||||
return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2;
|
||||
// Non-kernel function, we only use Tp
|
||||
return RISCV::X4;
|
||||
// Non-kernel function, we also use X8 for frame pointer
|
||||
return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X4;
|
||||
}
|
||||
|
||||
const uint32_t *
|
||||
|
|
|
@ -6,11 +6,11 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-LABEL: func:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: addi tp, tp, 4
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s1, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vsw.v v32, -16(tp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -4(sp)
|
||||
; VENTUS-NEXT: sw s0, -8(sp)
|
||||
; VENTUS-NEXT: sw s1, -12(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 12
|
||||
; VENTUS-NEXT: .cfi_offset s0, 8
|
||||
; VENTUS-NEXT: .cfi_offset s1, 4
|
||||
|
@ -30,11 +30,11 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v1)
|
||||
; VENTUS-NEXT: vadd.vv v0, v2, v0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s1, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vlw.v v32, -16(tp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -4(sp)
|
||||
; VENTUS-NEXT: lw s0, -8(sp)
|
||||
; VENTUS-NEXT: lw s1, -12(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: addi tp, tp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
|
|
|
@ -8,11 +8,12 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-LABEL: foo:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 48
|
||||
; VENTUS-NEXT: addi tp, tp, 20
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 48
|
||||
; VENTUS-NEXT: sw ra, -36(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -40(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s1, -44(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s2, -48(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -36(sp)
|
||||
; VENTUS-NEXT: sw s0, -40(sp)
|
||||
; VENTUS-NEXT: sw s1, -44(sp)
|
||||
; VENTUS-NEXT: sw s2, -48(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 12
|
||||
; VENTUS-NEXT: .cfi_offset s0, 8
|
||||
; VENTUS-NEXT: .cfi_offset s1, 4
|
||||
|
@ -20,7 +21,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: lw s0, 0(a0)
|
||||
; VENTUS-NEXT: lui a0, %hi(foo.b)
|
||||
; VENTUS-NEXT: addi s1, a0, %lo(foo.b)
|
||||
; VENTUS-NEXT: addi s2, tp, -32
|
||||
; VENTUS-NEXT: addi s2, sp, -32
|
||||
; VENTUS-NEXT: vmv.v.x v0, s2
|
||||
; VENTUS-NEXT: vmv.v.x v1, s1
|
||||
; VENTUS-NEXT: vmv.v.x v2, s0
|
||||
|
@ -35,7 +36,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: # %bb.1: # %if.then
|
||||
; VENTUS-NEXT: slli a0, a0, 2
|
||||
; VENTUS-NEXT: add s2, s2, a0
|
||||
; VENTUS-NEXT: vlw.v v0, zero(s2)
|
||||
; VENTUS-NEXT: vlw.v v0, 0(s2)
|
||||
; VENTUS-NEXT: add s1, s1, a0
|
||||
; VENTUS-NEXT: lw a1, 0(s1)
|
||||
; VENTUS-NEXT: add a0, s0, a0
|
||||
|
@ -45,17 +46,19 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: vmadd.vv v0, v1, v2
|
||||
; VENTUS-NEXT: vmv.v.x v1, a0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: j .LBB0_3
|
||||
; VENTUS-NEXT: join v0, v0, .LBB0_3
|
||||
; VENTUS-NEXT: .LBB0_2: # %if.else
|
||||
; VENTUS-NEXT: slli a0, a0, 2
|
||||
; VENTUS-NEXT: add a0, s0, a0
|
||||
; VENTUS-NEXT: sw zero, 0(a0)
|
||||
; VENTUS-NEXT: join v0, v0, .LBB0_3
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end
|
||||
; VENTUS-NEXT: lw ra, -36(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -40(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s1, -44(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s2, -48(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -36(sp)
|
||||
; VENTUS-NEXT: lw s0, -40(sp)
|
||||
; VENTUS-NEXT: lw s1, -44(sp)
|
||||
; VENTUS-NEXT: lw s2, -48(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -48
|
||||
; VENTUS-NEXT: addi tp, tp, -20
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%a = alloca [5 x i32], align 4, addrspace(5)
|
||||
|
@ -159,11 +162,11 @@ define dso_local void @private_memmory(ptr addrspace(5) nocapture noundef %a) lo
|
|||
; VENTUS-LABEL: private_memmory:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vlw.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vlw.v v0, 0(a0)
|
||||
; VENTUS-NEXT: lui a1, %hi(global_int)
|
||||
; VENTUS-NEXT: lw a1, %lo(global_int)(a1)
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, a1
|
||||
; VENTUS-NEXT: vsw.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vsw.v v0, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i32, ptr addrspace(5) %a, align 4
|
||||
|
@ -200,9 +203,9 @@ define dso_local void @private_memmory_lh(ptr addrspace(5) nocapture noundef %a)
|
|||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: lui a1, %hi(global_short)
|
||||
; VENTUS-NEXT: lh a1, %lo(global_short)(a1)
|
||||
; VENTUS-NEXT: vlh.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vlh.v v0, 0(a0)
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, a1
|
||||
; VENTUS-NEXT: vsh.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vsh.v v0, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i16, ptr addrspace(5) %a, align 2
|
||||
|
@ -217,7 +220,7 @@ define dso_local zeroext i16 @private_memmory_lhu(ptr addrspace(5) nocapture nou
|
|||
; VENTUS-LABEL: private_memmory_lhu:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vlhu.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vlhu.v v0, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i16, ptr addrspace(5) %a, align 2
|
||||
|
@ -229,7 +232,7 @@ define dso_local zeroext i8 @private_memmory_lbu(ptr addrspace(5) nocapture noun
|
|||
; VENTUS-LABEL: private_memmory_lbu:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vlbu.v v0, zero(a0)
|
||||
; VENTUS-NEXT: vlbu.v v0, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i8, ptr addrspace(5) %a, align 1
|
||||
|
|
|
@ -5,16 +5,15 @@
|
|||
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
|
||||
; VENTUS-LABEL: foo_fun:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vsw.v v1, -12(tp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vsw.v v2, -16(tp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: addi tp, tp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset v32.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: addi s0, tp, -8
|
||||
; VENTUS-NEXT: .cfi_def_cfa s0, 0
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vadd.vx v1, v1, zero
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
|
@ -30,12 +29,9 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vlw.v v1, -12(tp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vlw.v v2, -16(tp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: lw ra, -8(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
|
|
|
@ -7,9 +7,9 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s1, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -8(sp)
|
||||
; VENTUS-NEXT: sw s0, -12(sp)
|
||||
; VENTUS-NEXT: sw s1, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset s0, 4
|
||||
; VENTUS-NEXT: .cfi_offset s1, 0
|
||||
|
@ -24,9 +24,9 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s1, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -8(sp)
|
||||
; VENTUS-NEXT: lw s0, -12(sp)
|
||||
; VENTUS-NEXT: lw s1, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -43,14 +43,15 @@ entry:
|
|||
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
|
||||
; VENTUS-LABEL: foo_fun:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vsw.v v32, -12(tp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vsw.v v33, -16(tp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: addi tp, tp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset v32.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: addi s0, tp, -8
|
||||
; VENTUS-NEXT: .cfi_def_cfa s0, 0
|
||||
; VENTUS-NEXT: vadd.vx v32, v1, zero
|
||||
; VENTUS-NEXT: vadd.vx v33, v0, zero
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
|
@ -62,10 +63,9 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vlw.v v32, -12(tp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vlw.v v33, -16(tp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: lw ra, -8(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
|
|
|
@ -1,14 +1,39 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
|
||||
; VENTUS-LABEL: _kernel:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -8(sp)
|
||||
; VENTUS-NEXT: sw s0, -12(sp)
|
||||
; VENTUS-NEXT: sw s1, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset s0, 4
|
||||
; VENTUS-NEXT: .cfi_offset s1, 0
|
||||
; VENTUS-NEXT: lw s0, 0(a0)
|
||||
; VENTUS-NEXT: lw s1, 4(a0)
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
; VENTUS-NEXT: vadd.vx v1, v0, s1
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, s0
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vfadd.vv v1, v1, v2
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -8(sp)
|
||||
; VENTUS-NEXT: lw s0, -12(sp)
|
||||
; VENTUS-NEXT: lw s1, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call
|
||||
; VENTUS: lw s0, 0(a0)
|
||||
%0 = load float, ptr addrspace(1) %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %A, i32 %call
|
||||
; VENTUS: lw s1, 4(a0)
|
||||
%1 = load float, ptr addrspace(1) %arrayidx1, align 4
|
||||
%add = fadd float %0, %1
|
||||
store float %add, ptr addrspace(1) %arrayidx1, align 4
|
||||
|
@ -21,13 +46,18 @@ entry:
|
|||
declare dso_local i32 @_Z13get_global_idj(i32 noundef) local_unnamed_addr
|
||||
|
||||
define dso_local i32 @non_kernel_1(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef readonly %c){
|
||||
; VENTUS-LABEL: non_kernel_1:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v2)
|
||||
; VENTUS-NEXT: vadd.vv v0, v1, v0
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: vlw12.v v0, 0(v0)
|
||||
%0 = load i32, ptr %a, align 4
|
||||
; VENTUS: vlw12.v v1, 0(v1)
|
||||
%1 = load i32, ptr %b, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
; VENTUS: vlw12.v v2, 0(v2)
|
||||
%2 = load i32, ptr %c, align 4
|
||||
%add1 = add nsw i32 %add, %2
|
||||
ret i32 %add1
|
||||
|
@ -37,28 +67,99 @@ entry:
|
|||
; so the left two arguments need to be passed by tp stack
|
||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none)
|
||||
define dso_local i32 @non_kernel(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32, ptr addrspace(5) nocapture noundef readonly %0, ptr addrspace(5) nocapture noundef readonly %1) {
|
||||
; VENTUS-LABEL: non_kernel:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: vlw.v v48, 0(tp)
|
||||
; VENTUS-NEXT: vlw.v v49, 4(tp)
|
||||
; VENTUS-NEXT: vmv.x.s a0, v48
|
||||
; VENTUS-NEXT: vlw.v v48, 0(a0)
|
||||
; VENTUS-NEXT: vmv.x.s a0, v49
|
||||
; VENTUS-NEXT: vlw.v v49, 0(a0)
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v2)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v3)
|
||||
; VENTUS-NEXT: vadd.vv v0, v1, v0
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v4)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v5)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v6)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v7)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v8)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v9)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v10)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v11)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v12)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v13)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v14)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v15)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v16)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v17)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v18)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v19)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v20)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v21)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v22)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v23)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v24)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v25)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v26)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v27)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v28)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v29)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v30)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v31)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v48)
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v49)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: vlw.v v{{[1-9]+}}, -16(tp)
|
||||
%a33 = load ptr, ptr addrspace(5) %0, align 4
|
||||
; VENTUS: vlw.v v{{[1-9]+}}, -12(tp)
|
||||
%a34 = load ptr, ptr addrspace(5) %1, align 4
|
||||
; VENTUS: vlw12.v v0, 0(v0)
|
||||
%2 = load i32, ptr %a1, align 4
|
||||
; VENTUS: vlw12.v v1, 0(v1)
|
||||
%3 = load i32, ptr %a2, align 4
|
||||
%add = add nsw i32 %3, %2
|
||||
; VENTUS: vlw12.v v2, 0(v2)
|
||||
%4 = load i32, ptr %a3, align 4
|
||||
%add1 = add nsw i32 %add, %4
|
||||
; VENTUS: vlw12.v v3, 0(v3)
|
||||
%5 = load i32, ptr %a4, align 4
|
||||
%add2 = add nsw i32 %add1, %5
|
||||
%6 = load i32, ptr %a5, align 4
|
||||
; VENTUS: vlw12.v v{{[1-9]+}}, 0(v4)
|
||||
%add3 = add nsw i32 %add2, %6
|
||||
%7 = load i32, ptr %a6, align 4
|
||||
%add4 = add nsw i32 %add3, %7
|
||||
; VENTUS: vlw12.v v{{[1-9]+}}, 0(v5)
|
||||
%8 = load i32, ptr %a7, align 4
|
||||
%add5 = add nsw i32 %add4, %8
|
||||
%9 = load i32, ptr %a8, align 4
|
||||
|
@ -111,10 +212,8 @@ entry:
|
|||
%add29 = add nsw i32 %add28, %32
|
||||
%33 = load i32, ptr %a32, align 4
|
||||
%add30 = add nsw i32 %add29, %33
|
||||
; VENTUS: vlw12.v v{{[1-9]+}}, 0(v30)
|
||||
%34 = load i32, ptr %a33, align 4
|
||||
%add31 = add nsw i32 %add30, %34
|
||||
; VENTUS: vlw12.v v{{[1-9]+}}, 0(v31)
|
||||
%35 = load i32, ptr %a34, align 4
|
||||
%add32 = add nsw i32 %add31, %35
|
||||
ret i32 %add32
|
||||
|
|
|
@ -1,42 +1,79 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
|
||||
define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
; VENTUS-LABEL: fadd:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw a1, 4(a0)
|
||||
; VENTUS-NEXT: lw a2, 0(a0)
|
||||
; VENTUS-NEXT: fadd.s a1, a2, a1
|
||||
; VENTUS-NEXT: lw a0, 8(a0)
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: fadd.s a{{[1-9]}}, a2, a1
|
||||
%add1 = fadd float %c, %d
|
||||
store float %add1, ptr addrspace(1) %result, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
; VENTUS-LABEL: fsub:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw a1, 4(a0)
|
||||
; VENTUS-NEXT: lw a2, 0(a0)
|
||||
; VENTUS-NEXT: fsub.s a1, a2, a1
|
||||
; VENTUS-NEXT: lw a0, 8(a0)
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: fsub.s a{{[1-9]}}, a2, a1
|
||||
%sub = fsub float %c, %d
|
||||
store float %sub, ptr addrspace(1) %result, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
; VENTUS-LABEL: fmul:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw a1, 4(a0)
|
||||
; VENTUS-NEXT: lw a2, 0(a0)
|
||||
; VENTUS-NEXT: fmul.s a1, a2, a1
|
||||
; VENTUS-NEXT: lw a0, 8(a0)
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: fmul.s a{{[1-9]}}, a2, a1
|
||||
%mul = fmul float %c, %d
|
||||
store float %mul, ptr addrspace(1) %result, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
; VENTUS-LABEL: fdiv:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw a1, 4(a0)
|
||||
; VENTUS-NEXT: lw a2, 0(a0)
|
||||
; VENTUS-NEXT: fdiv.s a1, a2, a1
|
||||
; VENTUS-NEXT: lw a0, 8(a0)
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
|
||||
%div = fdiv float %c, %d
|
||||
store float %div, ptr addrspace(1) %result, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
; VENTUS-LABEL: fmadd:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw a1, 8(a0)
|
||||
; VENTUS-NEXT: lw a2, 4(a0)
|
||||
; VENTUS-NEXT: lw a3, 0(a0)
|
||||
; VENTUS-NEXT: fmadd.s a1, a3, a2, a1
|
||||
; VENTUS-NEXT: lw a0, 12(a0)
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
|
||||
%div = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
store float %div, ptr addrspace(1) %result, align 4
|
||||
ret void
|
||||
|
|
|
@ -144,10 +144,10 @@ define dso_local float @fneq(float noundef %a, float noundef %b) local_unnamed_a
|
|||
; VENTUS-LABEL: fneq:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: vmv.v.x v2, zero
|
||||
; VENTUS-NEXT: vmfne.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmfeq.vv v0, v0, v1
|
||||
; VENTUS-NEXT: lui a0, %hi(.LCPI11_0)
|
||||
; VENTUS-NEXT: addi a0, a0, %lo(.LCPI11_0)
|
||||
; VENTUS-NEXT: vbne v0, v2, .LBB11_2
|
||||
; VENTUS-NEXT: vbeq v0, v2, .LBB11_2
|
||||
; VENTUS-NEXT: # %bb.1: # %entry
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: join v0, v0, .LBB11_3
|
||||
|
@ -221,7 +221,8 @@ define dso_local float @fgt(float noundef %a) {
|
|||
; VENTUS-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI14_0)(a0)
|
||||
; VENTUS-NEXT: vmv.v.x v1, zero
|
||||
; VENTUS-NEXT: vmfgt.vf v0, v0, a0
|
||||
; VENTUS-NEXT: vmv.v.x v2, a0
|
||||
; VENTUS-NEXT: vmflt.vv v0, v2, v0
|
||||
; VENTUS-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; VENTUS-NEXT: addi a0, a0, %lo(.LCPI14_1)
|
||||
; VENTUS-NEXT: vbne v0, v1, .LBB14_2
|
||||
|
@ -252,7 +253,8 @@ define dso_local float @fge(float noundef %a) {
|
|||
; VENTUS-NEXT: lui a0, %hi(.LCPI15_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI15_0)(a0)
|
||||
; VENTUS-NEXT: vmv.v.x v1, zero
|
||||
; VENTUS-NEXT: vmfge.vf v0, v0, a0
|
||||
; VENTUS-NEXT: vmv.v.x v2, a0
|
||||
; VENTUS-NEXT: vmfle.vv v0, v2, v0
|
||||
; VENTUS-NEXT: lui a0, %hi(.LCPI15_1)
|
||||
; VENTUS-NEXT: addi a0, a0, %lo(.LCPI15_1)
|
||||
; VENTUS-NEXT: vbne v0, v1, .LBB15_2
|
||||
|
@ -476,7 +478,10 @@ define float @fsgnjn_v(float %a, float %b) nounwind {
|
|||
; VENTUS-LABEL: fsgnjn_v:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: vfadd.vv v1, v0, v1
|
||||
; VENTUS-NEXT: vfsgnjn.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vmv.x.s a1, v1
|
||||
; VENTUS-NEXT: fsgnjn.s a0, a0, a1
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = fadd float %a, %b
|
||||
%2 = fneg float %1
|
||||
|
@ -487,7 +492,9 @@ define float @fsgnjn_v(float %a, float %b) nounwind {
|
|||
define float @fsgnjn_v_1(float %a) nounwind {
|
||||
; VENTUS-LABEL: fsgnjn_v_1:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: vfsgnjn.vv v0, v0, v0
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: fneg.s a0, a0
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = fneg float %a
|
||||
ret float %1
|
||||
|
@ -496,7 +503,9 @@ define float @fsgnjn_v_1(float %a) nounwind {
|
|||
define float @fsgnjnx_v(float %a) nounwind {
|
||||
; VENTUS-LABEL: fsgnjnx_v:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: vfsgnjx.vv v0, v0, v0
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: fabs.s a0, a0
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = call float @llvm.fabs.f32(float %a)
|
||||
ret float %1
|
||||
|
|
|
@ -25,8 +25,8 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -12(sp)
|
||||
; VENTUS-NEXT: sw s0, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset s0, 0
|
||||
; VENTUS-NEXT: lw s0, 8(a0)
|
||||
|
@ -37,8 +37,8 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
|
|||
; VENTUS-NEXT: call bar
|
||||
; VENTUS-NEXT: vmv.v.x v1, s0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -12(sp)
|
||||
; VENTUS-NEXT: lw s0, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
|
|
@ -12,19 +12,20 @@ target triple = "riscv32"
|
|||
define dso_local i32 @printf(ptr noundef %fmt, ...) {
|
||||
; VENTUS-LABEL: printf:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 48
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 48
|
||||
; VENTUS-NEXT: addi sp, sp, 32
|
||||
; VENTUS-NEXT: addi tp, tp, 28
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 28
|
||||
; VENTUS-NEXT: li a0, 0
|
||||
; VENTUS-NEXT: vsw.v v7, -44(tp)
|
||||
; VENTUS-NEXT: vsw.v v6, -40(tp)
|
||||
; VENTUS-NEXT: vsw.v v5, -36(tp)
|
||||
; VENTUS-NEXT: vsw.v v4, -32(tp)
|
||||
; VENTUS-NEXT: vsw.v v3, -28(tp)
|
||||
; VENTUS-NEXT: vsw.v v2, -24(tp)
|
||||
; VENTUS-NEXT: vsw.v v1, -20(tp)
|
||||
; VENTUS-NEXT: addi a1, tp, -16
|
||||
; VENTUS-NEXT: sw a1, -16(tp)
|
||||
; VENTUS-NEXT: vmv.x.s a1, v1
|
||||
; VENTUS-NEXT: vsw.v v7, -28(tp)
|
||||
; VENTUS-NEXT: vsw.v v6, -24(tp)
|
||||
; VENTUS-NEXT: vsw.v v5, -20(tp)
|
||||
; VENTUS-NEXT: vsw.v v4, -16(tp)
|
||||
; VENTUS-NEXT: vsw.v v3, -12(tp)
|
||||
; VENTUS-NEXT: vsw.v v2, -8(tp)
|
||||
; VENTUS-NEXT: vsw.v v1, -4(tp)
|
||||
; VENTUS-NEXT: addi a1, tp, -28
|
||||
; VENTUS-NEXT: sw a1, -4(tp)
|
||||
; VENTUS-NEXT: lw a1, -32(tp)
|
||||
; VENTUS-NEXT: lui a2, %hi(MAX_FORMAT_STR_SIZE)
|
||||
; VENTUS-NEXT: lui a3, %hi(PRINT_BUFFER_ADDR)
|
||||
; VENTUS-NEXT: lw a4, %lo(MAX_FORMAT_STR_SIZE)(a2)
|
||||
|
@ -44,7 +45,8 @@ define dso_local i32 @printf(ptr noundef %fmt, ...) {
|
|||
; VENTUS-NEXT: blt a0, a4, .LBB0_1
|
||||
; VENTUS-NEXT: .LBB0_2: # %for.end
|
||||
; VENTUS-NEXT: vmv.v.x v0, a1
|
||||
; VENTUS-NEXT: addi tp, tp, -48
|
||||
; VENTUS-NEXT: addi sp, sp, -32
|
||||
; VENTUS-NEXT: addi tp, tp, -28
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%fmt.addr = alloca ptr, align 4
|
||||
|
|
|
@ -6,10 +6,12 @@
|
|||
define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
|
||||
; VENTUS-LABEL: branch:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 0
|
||||
; VENTUS-NEXT: sw ra, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: mv s0, tp
|
||||
; VENTUS-NEXT: .cfi_def_cfa s0, 0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: li a1, 14
|
||||
|
@ -25,14 +27,14 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: join v0, v0, .LBB0_2
|
||||
; VENTUS-NEXT: .LBB0_2: # %cleanup
|
||||
; VENTUS-NEXT: vmv.v.x v0, a1
|
||||
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: lw ra, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: join v0, v0, .LBB0_4
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end3
|
||||
; VENTUS-NEXT: li a0, 4
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: lw ra, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: join v0, v0, .LBB0_4
|
||||
; VENTUS-NEXT: .LBB0_4:
|
||||
|
@ -60,11 +62,11 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -12(sp)
|
||||
; VENTUS-NEXT: sw s1, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset s0, 0
|
||||
; VENTUS-NEXT: mv s0, a0
|
||||
; VENTUS-NEXT: .cfi_offset s1, 0
|
||||
; VENTUS-NEXT: mv s1, a0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
|
@ -72,8 +74,8 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: vmv.v.x v1, a0
|
||||
; VENTUS-NEXT: vbeq v1, v0, .LBB1_4
|
||||
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
|
||||
; VENTUS-NEXT: lw a3, 4(s0)
|
||||
; VENTUS-NEXT: lw a1, 0(s0)
|
||||
; VENTUS-NEXT: lw a3, 4(s1)
|
||||
; VENTUS-NEXT: lw a1, 0(s1)
|
||||
; VENTUS-NEXT: slli a4, a0, 2
|
||||
; VENTUS-NEXT: add a1, a1, a4
|
||||
; VENTUS-NEXT: lw a2, 0(a1)
|
||||
|
@ -89,8 +91,8 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB1_4:
|
||||
; VENTUS-NEXT: join v0, v0, .LBB1_3
|
||||
; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -12(sp)
|
||||
; VENTUS-NEXT: lw s1, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -202,11 +204,11 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -12(sp)
|
||||
; VENTUS-NEXT: sw s1, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset s0, 0
|
||||
; VENTUS-NEXT: mv s0, a0
|
||||
; VENTUS-NEXT: .cfi_offset s1, 0
|
||||
; VENTUS-NEXT: mv s1, a0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
|
@ -215,8 +217,8 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: vbeq v1, v0, .LBB2_6
|
||||
; VENTUS-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
|
||||
; VENTUS-NEXT: li a1, 0
|
||||
; VENTUS-NEXT: lw a4, 4(s0)
|
||||
; VENTUS-NEXT: lw a2, 0(s0)
|
||||
; VENTUS-NEXT: lw a4, 4(s1)
|
||||
; VENTUS-NEXT: lw a2, 0(s1)
|
||||
; VENTUS-NEXT: slli a5, a0, 2
|
||||
; VENTUS-NEXT: add a2, a2, a5
|
||||
; VENTUS-NEXT: lw a3, 0(a2)
|
||||
|
@ -241,8 +243,8 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB2_6:
|
||||
; VENTUS-NEXT: join v0, v0, .LBB2_5
|
||||
; VENTUS-NEXT: .LBB2_5: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -12(sp)
|
||||
; VENTUS-NEXT: lw s1, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -286,11 +288,11 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, -12(sp)
|
||||
; VENTUS-NEXT: sw s1, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset s0, 0
|
||||
; VENTUS-NEXT: mv s0, a0
|
||||
; VENTUS-NEXT: .cfi_offset s1, 0
|
||||
; VENTUS-NEXT: mv s1, a0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
|
@ -299,8 +301,8 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: vbeq v1, v0, .LBB3_10
|
||||
; VENTUS-NEXT: # %bb.1: # %for.body.lr.ph
|
||||
; VENTUS-NEXT: li a1, 0
|
||||
; VENTUS-NEXT: lw a2, 4(s0)
|
||||
; VENTUS-NEXT: lw a5, 0(s0)
|
||||
; VENTUS-NEXT: lw a2, 4(s1)
|
||||
; VENTUS-NEXT: lw a5, 0(s1)
|
||||
; VENTUS-NEXT: slli a3, a0, 2
|
||||
; VENTUS-NEXT: add a2, a2, a3
|
||||
; VENTUS-NEXT: add a3, a5, a3
|
||||
|
@ -342,8 +344,8 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB3_10:
|
||||
; VENTUS-NEXT: join v0, v0, .LBB3_9
|
||||
; VENTUS-NEXT: .LBB3_9: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, -12(sp)
|
||||
; VENTUS-NEXT: lw s1, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -393,10 +395,12 @@ for.inc: ; preds = %for.inc.sink.split,
|
|||
define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
|
||||
; VENTUS-LABEL: _Z13get_global_idj:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 0
|
||||
; VENTUS-NEXT: sw ra, -16(sp)
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: mv s0, tp
|
||||
; VENTUS-NEXT: .cfi_def_cfa s0, 0
|
||||
; VENTUS-NEXT: li a0, 2
|
||||
; VENTUS-NEXT: vmv.v.x v1, a0
|
||||
; VENTUS-NEXT: vbeq v0, v1, .LBB4_4
|
||||
|
@ -424,8 +428,8 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: join v0, v0, .LBB4_8
|
||||
; VENTUS-NEXT: .LBB4_8: # %return
|
||||
; VENTUS-NEXT: vmv.v.x v0, a0
|
||||
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: lw ra, -16(sp)
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
switch i32 %dim, label %return [
|
||||
|
|
Loading…
Reference in New Issue