Merge pull request #89 from THU-DSP-LAB/eliminate_call_frame
[VENTUS][fix] Fix framelowering and calculation method of stack offset
This commit is contained in:
commit
49c039a902
|
@ -81,7 +81,7 @@ __builtin_riscv_workitem_linear_id:
|
||||||
.type __builtin_riscv_global_linear_id, @function
|
.type __builtin_riscv_global_linear_id, @function
|
||||||
__builtin_riscv_global_linear_id:
|
__builtin_riscv_global_linear_id:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
csrr a3, CSR_KNL # Get kernel metadata buffer
|
csrr a3, CSR_KNL # Get kernel metadata buffer
|
||||||
lw t0, KNL_WORK_DIM(a3) # Get work_dims
|
lw t0, KNL_WORK_DIM(a3) # Get work_dims
|
||||||
call __builtin_riscv_global_id_x
|
call __builtin_riscv_global_id_x
|
||||||
|
@ -109,7 +109,7 @@ __builtin_riscv_global_linear_id:
|
||||||
vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2
|
vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2
|
||||||
.GLR:
|
.GLR:
|
||||||
vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims
|
vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ __builtin_riscv_workitem_id_z:
|
||||||
.type __builtin_riscv_global_id_x, @function
|
.type __builtin_riscv_global_id_x, @function
|
||||||
__builtin_riscv_global_id_x:
|
__builtin_riscv_global_id_x:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_x
|
call __builtin_riscv_workitem_id_x
|
||||||
csrr a0, CSR_KNL # Get kernel metadata buffer
|
csrr a0, CSR_KNL # Get kernel metadata buffer
|
||||||
csrr t1, CSR_GID_X # Get group_id_x
|
csrr t1, CSR_GID_X # Get group_id_x
|
||||||
|
@ -211,7 +211,7 @@ __builtin_riscv_global_id_x:
|
||||||
mul t6, t1, t3 # CSR_GID_X * local_size_x
|
mul t6, t1, t3 # CSR_GID_X * local_size_x
|
||||||
add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x
|
add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x
|
||||||
vadd.vx v0,v0, t6
|
vadd.vx v0,v0, t6
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ __builtin_riscv_global_id_x:
|
||||||
.type __builtin_riscv_global_id_y, @function
|
.type __builtin_riscv_global_id_y, @function
|
||||||
__builtin_riscv_global_id_y:
|
__builtin_riscv_global_id_y:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_y
|
call __builtin_riscv_workitem_id_y
|
||||||
csrr t1, CSR_GID_Y # Get group_id_y
|
csrr t1, CSR_GID_Y # Get group_id_y
|
||||||
lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
|
lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
|
||||||
|
@ -229,7 +229,7 @@ __builtin_riscv_global_id_y:
|
||||||
mul t3, t1, t2 # CSR_GID_Y * local_size_y
|
mul t3, t1, t2 # CSR_GID_Y * local_size_y
|
||||||
add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y)
|
add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y)
|
||||||
vadd.vx v0, v0, t3 # global_id_y
|
vadd.vx v0, v0, t3 # global_id_y
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -239,7 +239,7 @@ __builtin_riscv_global_id_y:
|
||||||
.type __builtin_riscv_global_id_z, @function
|
.type __builtin_riscv_global_id_z, @function
|
||||||
__builtin_riscv_global_id_z:
|
__builtin_riscv_global_id_z:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_z
|
call __builtin_riscv_workitem_id_z
|
||||||
csrr a0, CSR_KNL # Get kernel metadata buffer
|
csrr a0, CSR_KNL # Get kernel metadata buffer
|
||||||
csrr t1, CSR_GID_Z # Get group_id_z
|
csrr t1, CSR_GID_Z # Get group_id_z
|
||||||
|
@ -248,7 +248,7 @@ __builtin_riscv_global_id_z:
|
||||||
mul t2, t2, t1 # CSR_GID_Z * local_size_z
|
mul t2, t2, t1 # CSR_GID_Z * local_size_z
|
||||||
add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z)
|
add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z)
|
||||||
vadd.vx v0, v0, t2 # global_id_z
|
vadd.vx v0, v0, t2 # global_id_z
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
|
@ -466,6 +466,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
MachineBasicBlock &MBB) const {
|
MachineBasicBlock &MBB) const {
|
||||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
|
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||||
Register SPReg = getSPReg(STI);
|
Register SPReg = getSPReg(STI);
|
||||||
Register TPReg = getTPReg(STI);
|
Register TPReg = getTPReg(STI);
|
||||||
|
|
||||||
|
@ -505,26 +506,46 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||||
StackOffset::getFixed(-SPStackSize),
|
StackOffset::getFixed(-SPStackSize),
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
if(TPStackSize)
|
if(TPStackSize) {
|
||||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||||
StackOffset::getFixed(-TPStackSize),
|
StackOffset::getFixed(-TPStackSize),
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
|
|
||||||
|
// Restore V32
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(RISCV::VMV_V_X),
|
||||||
|
RI->getPrivateMemoryBaseRegister(MF))
|
||||||
|
.addReg(TPReg);
|
||||||
|
}
|
||||||
|
|
||||||
// Emit epilogue for shadow call stack.
|
// Emit epilogue for shadow call stack.
|
||||||
emitSCSEpilogue(MF, MBB, MBBI, DL);
|
emitSCSEpilogue(MF, MBB, MBBI, DL);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t RISCVFrameLowering::getExtractedStackOffset(const MachineFunction &MF,
|
uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
|
||||||
unsigned FI, RISCVStackID::Value Stack) const {
|
int FI,
|
||||||
|
RISCVStackID::Value Stack) const {
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
uint64_t StackSize = 0;
|
uint64_t StackSize = 0;
|
||||||
for(int I = FI + 1; I != MFI.getObjectIndexEnd(); I++) {
|
|
||||||
if(static_cast<unsigned>(MFI.getStackID(I)) != Stack) {
|
// because the parameters spilling to the stack are not in the current TP
|
||||||
|
// stack, the offset in the current stack should not be calculated from a
|
||||||
|
// negative FI.
|
||||||
|
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
|
||||||
|
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
|
||||||
// Need to consider the alignment for different frame index
|
// Need to consider the alignment for different frame index
|
||||||
uint64_t Size = MFI.getObjectSize(I);
|
Align Alignment =
|
||||||
StackSize += Size;
|
MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
|
||||||
|
StackSize += MFI.getObjectSize(I);
|
||||||
|
StackSize = alignTo(StackSize, Alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In the case of parameters spilling to the stack, needing to add the size of
|
||||||
|
// the current TP stack because the parameters are on the caller's TP stack
|
||||||
|
// instead of current stack.
|
||||||
|
if (FI < 0 && !MF.getFunction().isVarArg())
|
||||||
|
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
|
|
||||||
return StackSize;
|
return StackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -545,33 +566,16 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||||
StackID == RISCVStackID::SGPRSpill ||
|
StackID == RISCVStackID::SGPRSpill ||
|
||||||
StackID == RISCVStackID::VGPRSpill) &&
|
StackID == RISCVStackID::VGPRSpill) &&
|
||||||
"Unexpected stack ID for the frame object.");
|
"Unexpected stack ID for the frame object.");
|
||||||
uint8_t Stack = MFI.getStackID(FI);
|
|
||||||
StackOffset Offset =
|
|
||||||
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea()
|
|
||||||
-getExtractedStackOffset(MF, FI, RISCVStackID::Value(Stack))
|
|
||||||
+ MFI.getOffsetAdjustment());
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Different stacks for sALU and vALU threads.
|
// Different stacks for sALU and vALU threads.
|
||||||
FrameReg = StackID == RISCVStackID::SGPRSpill ? RISCV::X2 : RISCV::X4;
|
if (StackID == RISCVStackID::VGPRSpill)
|
||||||
|
FrameReg = RISCV::X4;
|
||||||
if (CSI.size()) {
|
else if (StackID == RISCVStackID::SGPRSpill)
|
||||||
// For callee saved registers
|
FrameReg = RISCV::X2;
|
||||||
MinCSFI = CSI[0].getFrameIdx();
|
else
|
||||||
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
|
FrameReg = RISCV::X8;
|
||||||
if (FI >= MinCSFI && FI <= MaxCSFI) {
|
return -StackOffset::getFixed(
|
||||||
Offset -= StackOffset::getFixed(RVFI->getVarArgsSaveSize());
|
getStackOffset(MF, FI, (RISCVStackID::Value)StackID));
|
||||||
return Offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// TODO: This only saves sGPR CSRs, as we haven't define vGPR CSRs
|
|
||||||
// within getNonLibcallCSI.
|
|
||||||
// if (FI >= MinCSFI && FI <= MaxCSFI) {
|
|
||||||
Offset -= StackOffset::getFixed(
|
|
||||||
getStackSize(const_cast<MachineFunction&>(MF),
|
|
||||||
(RISCVStackID::Value)StackID));
|
|
||||||
return Offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||||
|
@ -632,7 +636,7 @@ static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
|
||||||
// by the frame pointer.
|
// by the frame pointer.
|
||||||
// Let eliminateCallFramePseudoInstr preserve stack space for it.
|
// Let eliminateCallFramePseudoInstr preserve stack space for it.
|
||||||
bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||||
return !MF.getFrameInfo().hasVarSizedObjects();
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
|
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
|
||||||
|
@ -644,6 +648,9 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
|
||||||
// Kernel and normal function has different stack pointer for Ventus GPGPU.
|
// Kernel and normal function has different stack pointer for Ventus GPGPU.
|
||||||
Register SPReg = RISCV::X4; // MFI->isEntryFunction() ? RISCV::X2 : RISCV::X4;
|
Register SPReg = RISCV::X4; // MFI->isEntryFunction() ? RISCV::X2 : RISCV::X4;
|
||||||
DebugLoc DL = MI->getDebugLoc();
|
DebugLoc DL = MI->getDebugLoc();
|
||||||
|
Register TPReg = getTPReg(STI);
|
||||||
|
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||||
|
const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
|
||||||
|
|
||||||
if (!hasReservedCallFrame(MF)) {
|
if (!hasReservedCallFrame(MF)) {
|
||||||
// If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
|
// If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
|
||||||
|
@ -660,9 +667,19 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
|
||||||
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
|
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
|
||||||
Amount = -Amount;
|
Amount = -Amount;
|
||||||
|
|
||||||
const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
|
|
||||||
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
|
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
|
||||||
MachineInstr::NoFlags, getStackAlign());
|
MachineInstr::NoFlags, getStackAlign());
|
||||||
|
|
||||||
|
// The value of TP will be re-assigned to V32 at the end of the callee
|
||||||
|
// function, which is actually the TP value after ADJCALLSTACKUP, so the
|
||||||
|
// tp value after ADJCALLSTACKDOWN should be reassigned to V32 to ensure
|
||||||
|
// that it is consistent with the TP value that has not been internally
|
||||||
|
// adjusted (that is, excluding the initial TP adjustment) within the
|
||||||
|
// current function.
|
||||||
|
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
|
||||||
|
BuildMI(MBB, MI, DL, TII->get(RISCV::VMV_V_X),
|
||||||
|
RI.getPrivateMemoryBaseRegister(MF))
|
||||||
|
.addReg(TPReg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -707,19 +724,20 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
|
uint64_t RISCVFrameLowering::getStackSize(const MachineFunction &MF,
|
||||||
RISCVStackID::Value ID) const {
|
RISCVStackID::Value ID) const {
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
uint64_t StackSize = 0;
|
uint64_t StackSize = 0;
|
||||||
|
|
||||||
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
for(int I = 0; I != MFI.getObjectIndexEnd(); I++) {
|
||||||
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
|
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
|
||||||
// Need to consider the alignment for different frame index
|
Align Alignment = MFI.getObjectAlign(I).value() <= 4 ?
|
||||||
uint64_t Size = ((MFI.getObjectSize(I) + 3) >> 2) * 4;
|
Align(4) : MFI.getObjectAlign(I);
|
||||||
StackSize += Size;
|
StackSize += MFI.getObjectSize(I);
|
||||||
|
StackSize = alignTo(StackSize, Alignment);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
return StackSize;
|
return StackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ public:
|
||||||
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
|
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
|
||||||
|
|
||||||
/// Get stack size for different stack ID
|
/// Get stack size for different stack ID
|
||||||
uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const;
|
uint64_t getStackSize(const MachineFunction &MF, RISCVStackID::Value ID) const;
|
||||||
|
|
||||||
/// Frame Objects:
|
/// Frame Objects:
|
||||||
/// fi#0: id=4 size=48, align=4, at location [SP+8]
|
/// fi#0: id=4 size=48, align=4, at location [SP+8]
|
||||||
|
@ -77,7 +77,7 @@ public:
|
||||||
/// As we can see, if we split the stack, different frame offset calculation
|
/// As we can see, if we split the stack, different frame offset calculation
|
||||||
/// need to be modified too, when calculate the TP stack offset, we need to
|
/// need to be modified too, when calculate the TP stack offset, we need to
|
||||||
/// extract the stack offset of 'SP' in machine function frame
|
/// extract the stack offset of 'SP' in machine function frame
|
||||||
uint64_t getExtractedStackOffset(const MachineFunction &MF, unsigned FI,
|
uint64_t getStackOffset(const MachineFunction &MF, int FI,
|
||||||
RISCVStackID::Value Stack) const;
|
RISCVStackID::Value Stack) const;
|
||||||
|
|
||||||
/// Before insert prolog/epilog information, set stack ID for each frame index
|
/// Before insert prolog/epilog information, set stack ID for each frame index
|
||||||
|
|
|
@ -11787,8 +11787,12 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
|
||||||
// type, instead of the scalable vector type.
|
// type, instead of the scalable vector type.
|
||||||
ValVT = LocVT;
|
ValVT = LocVT;
|
||||||
}
|
}
|
||||||
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
|
|
||||||
|
// Just align to 4 bytes, because parameters more than 4 bytes will be split
|
||||||
|
// into 4-byte parameters
|
||||||
|
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), 0,
|
||||||
/*IsImmutable=*/true);
|
/*IsImmutable=*/true);
|
||||||
|
MFI.setObjectAlignment(FI, Align(4));
|
||||||
// This is essential for calculating stack size for VGPRSpill
|
// This is essential for calculating stack size for VGPRSpill
|
||||||
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
||||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||||
|
@ -11982,6 +11986,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
RegInfo.addLiveIn(ArgRegs[I], Reg);
|
RegInfo.addLiveIn(ArgRegs[I], Reg);
|
||||||
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
|
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
|
||||||
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
|
||||||
|
MFI.setObjectAlignment(FI, Align(4));
|
||||||
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
||||||
// MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
// MFI.setStackID(FI, RISCVStackID::VGPRSpill);
|
||||||
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
||||||
|
@ -12151,6 +12156,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||||
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
|
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
|
||||||
SmallVector<SDValue, 8> MemOpChains;
|
SmallVector<SDValue, 8> MemOpChains;
|
||||||
SDValue StackPtr;
|
SDValue StackPtr;
|
||||||
|
|
||||||
|
// Get the value of adjusting the stack frame before the Call.
|
||||||
|
uint64_t CurrentFrameSize = Chain->getConstantOperandVal(1);
|
||||||
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
SDValue ArgValue = OutVals[i];
|
SDValue ArgValue = OutVals[i];
|
||||||
|
@ -12256,11 +12264,13 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||||
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
||||||
SDValue Address =
|
SDValue Address =
|
||||||
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
||||||
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
|
DAG.getIntPtrConstant(-((int)VA.getLocMemOffset()
|
||||||
|
+ CurrentFrameSize), DL));
|
||||||
|
|
||||||
// Emit the store.
|
// Emit the store.
|
||||||
MemOpChains.push_back(
|
MemOpChains.push_back(
|
||||||
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
|
DAG.getStore(Chain, DL, ArgValue, Address,
|
||||||
|
MachinePointerInfo(RISCVAS::PRIVATE_ADDRESS)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1463,6 +1463,12 @@ std::string RISCVInstrInfo::createMIROperandComment(
|
||||||
return Comment;
|
return Comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int RISCVInstrInfo::getSPAdjust(const MachineInstr &MI) const {
|
||||||
|
// FIXME: Don't need this value now, but we can add relevant modifications
|
||||||
|
// here when we optimize the PrologueInsert stage in the future.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
|
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
|
||||||
bool RISCV::isSEXT_W(const MachineInstr &MI) {
|
bool RISCV::isSEXT_W(const MachineInstr &MI) {
|
||||||
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
|
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
|
||||||
|
|
|
@ -202,6 +202,8 @@ public:
|
||||||
|
|
||||||
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
|
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
|
||||||
|
|
||||||
|
int getSPAdjust(const MachineInstr &MI) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const RISCVSubtarget &STI;
|
const RISCVSubtarget &STI;
|
||||||
};
|
};
|
||||||
|
|
|
@ -0,0 +1,347 @@
|
||||||
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||||
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048)
|
||||||
|
define dso_local <16 x double> @func(<16 x double> noundef %x, <16 x double> noundef %y) local_unnamed_addr {
|
||||||
|
; VENTUS: vsw.v v33, -4(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v35, -12(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v36, -16(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v37, -20(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v38, -24(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v39, -28(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v40, -32(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v41, -36(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v42, -40(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v43, -44(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v44, -48(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v45, -52(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v46, -56(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v47, -60(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v48, -64(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v49, -68(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v50, -72(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v51, -76(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v52, -80(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v53, -84(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v54, -88(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v55, -92(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v56, -96(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v57, -100(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v58, -104(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v59, -108(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v60, -112(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v61, -116(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v62, -120(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS-NEXT: vsw.v v63, -124(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v64, -128(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v65, -132(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v66, -136(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v67, -140(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v68, -144(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v69, -148(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v70, -152(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v71, -156(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v72, -160(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v73, -164(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v74, -168(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v75, -172(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v76, -176(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v77, -180(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v78, -184(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v79, -188(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v80, -192(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v81, -196(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v82, -200(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v83, -204(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v84, -208(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v85, -212(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v86, -216(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v87, -220(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v88, -224(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v89, -228(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v90, -232(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v91, -236(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v92, -240(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v93, -244(v32) # 4-byte Folded Spill
|
||||||
|
; VENTUS-NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS-NEXT: vsw.v v94, -248(v32) # 4-byte Folded Spill
|
||||||
|
entry:
|
||||||
|
%add = fadd <16 x double> %x, %y
|
||||||
|
ret <16 x double> %add
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) vscale_range(1,2048)
|
||||||
|
define dso_local ventus_kernel void @test_fn(ptr addrspace(1) nocapture noundef readonly align 128 %x, ptr addrspace(1) nocapture noundef readonly align 128 %y, ptr addrspace(1) nocapture noundef writeonly align 128 %dst) {
|
||||||
|
; VENTUS: addi tp, tp, 128
|
||||||
|
; VENTUS=NEXT: li t0, 4
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 2
|
||||||
|
; VENTUS=NEXT: vmv.v.x v66, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 80
|
||||||
|
; VENTUS=NEXT: vsw.v v34, 0(v66)
|
||||||
|
; VENTUS=NEXT: li t0, 8
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS=NEXT: vsw.v v65, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 12
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 136
|
||||||
|
; VENTUS=NEXT: vsw.v v64, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 16
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v63, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 20
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v62, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 24
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v61, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 28
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v60, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 32
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v59, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 36
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v58, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 40
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v57, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 44
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v56, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 48
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v55, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 52
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v54, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 56
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v53, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 60
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v52, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 64
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v51, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 68
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v50, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 72
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v49, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 76
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v48, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 80
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v47, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 84
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v46, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 88
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v45, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 92
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v44, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 96
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v43, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 100
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v42, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 104
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v41, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 108
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v40, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 112
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v39, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 116
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v38, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 120
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v37, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 124
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v36, 0(v34)
|
||||||
|
; VENTUS=NEXT: li t0, 128
|
||||||
|
; VENTUS=NEXT: sub t0, tp, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 1
|
||||||
|
; VENTUS=NEXT: vmv.v.x v34, t0
|
||||||
|
; VENTUS=NEXT: regext zero, zero, 72
|
||||||
|
; VENTUS=NEXT: vsw.v v35, 0(v34)
|
||||||
|
; VENTUS=NEXT: call _Z3minDv16_dS_
|
||||||
|
; VENTUS=NEXT: addi tp, tp, -128
|
||||||
|
entry:
|
||||||
|
%call = call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||||
|
%arrayidx = getelementptr inbounds <16 x double>, ptr addrspace(1) %x, i32 %call
|
||||||
|
%0 = load <16 x double>, ptr addrspace(1) %arrayidx, align 128
|
||||||
|
%arrayidx1 = getelementptr inbounds <16 x double>, ptr addrspace(1) %y, i32 %call
|
||||||
|
%1 = load <16 x double>, ptr addrspace(1) %arrayidx1, align 128
|
||||||
|
%call2 = call <16 x double> @_Z3minDv16_dS_(<16 x double> noundef %0, <16 x double> noundef %1)
|
||||||
|
%arrayidx3 = getelementptr inbounds <16 x double>, ptr addrspace(1) %dst, i32 %call
|
||||||
|
store <16 x double> %call2, ptr addrspace(1) %arrayidx3, align 128
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
|
||||||
|
declare dso_local i32 @_Z13get_global_idj(i32 noundef) local_unnamed_addr #2
|
||||||
|
|
||||||
|
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
|
||||||
|
declare dso_local <16 x double> @_Z3minDv16_dS_(<16 x double> noundef, <16 x double> noundef) local_unnamed_addr #2
|
Loading…
Reference in New Issue