[VENTUS][fix] Fix the Offset of private variable offset on stack

Fix the Offset of private variable offset on stack.
This commit is contained in:
qinfan 2023-12-22 14:13:04 +08:00
parent 755797e27c
commit d809d3a2bd
3 changed files with 18 additions and 30 deletions

View File

@ -81,7 +81,7 @@ __builtin_riscv_workitem_linear_id:
.type __builtin_riscv_global_linear_id, @function .type __builtin_riscv_global_linear_id, @function
__builtin_riscv_global_linear_id: __builtin_riscv_global_linear_id:
addi sp, sp, 4 addi sp, sp, 4
sw ra, 0(sp) sw ra, -4(sp)
csrr a3, CSR_KNL # Get kernel metadata buffer csrr a3, CSR_KNL # Get kernel metadata buffer
lw t0, KNL_WORK_DIM(a3) # Get work_dims lw t0, KNL_WORK_DIM(a3) # Get work_dims
call __builtin_riscv_global_id_x call __builtin_riscv_global_id_x
@ -109,7 +109,7 @@ __builtin_riscv_global_linear_id:
vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2 vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2
.GLR: .GLR:
vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims
lw ra, 0(sp) lw ra, -4(sp)
addi sp, sp, -4 addi sp, sp, -4
ret ret
@ -202,7 +202,7 @@ __builtin_riscv_workitem_id_z:
.type __builtin_riscv_global_id_x, @function .type __builtin_riscv_global_id_x, @function
__builtin_riscv_global_id_x: __builtin_riscv_global_id_x:
addi sp, sp, 4 addi sp, sp, 4
sw ra, 0(sp) sw ra, -4(sp)
call __builtin_riscv_workitem_id_x call __builtin_riscv_workitem_id_x
csrr a0, CSR_KNL # Get kernel metadata buffer csrr a0, CSR_KNL # Get kernel metadata buffer
csrr t1, CSR_GID_X # Get group_id_x csrr t1, CSR_GID_X # Get group_id_x
@ -211,7 +211,7 @@ __builtin_riscv_global_id_x:
mul t6, t1, t3 # CSR_GID_X * local_size_x mul t6, t1, t3 # CSR_GID_X * local_size_x
add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x
vadd.vx v0,v0, t6 vadd.vx v0,v0, t6
lw ra, 0(sp) lw ra, -4(sp)
addi sp, sp, -4 addi sp, sp, -4
ret ret
@ -221,7 +221,7 @@ __builtin_riscv_global_id_x:
.type __builtin_riscv_global_id_y, @function .type __builtin_riscv_global_id_y, @function
__builtin_riscv_global_id_y: __builtin_riscv_global_id_y:
addi sp, sp, 4 addi sp, sp, 4
sw ra, 0(sp) sw ra, -4(sp)
call __builtin_riscv_workitem_id_y call __builtin_riscv_workitem_id_y
csrr t1, CSR_GID_Y # Get group_id_y csrr t1, CSR_GID_Y # Get group_id_y
lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
@ -229,7 +229,7 @@ __builtin_riscv_global_id_y:
mul t3, t1, t2 # CSR_GID_Y * local_size_y mul t3, t1, t2 # CSR_GID_Y * local_size_y
add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y) add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y)
vadd.vx v0, v0, t3 # global_id_y vadd.vx v0, v0, t3 # global_id_y
lw ra, 0(sp) lw ra, -4(sp)
addi sp, sp, -4 addi sp, sp, -4
ret ret
@ -239,7 +239,7 @@ __builtin_riscv_global_id_y:
.type __builtin_riscv_global_id_z, @function .type __builtin_riscv_global_id_z, @function
__builtin_riscv_global_id_z: __builtin_riscv_global_id_z:
addi sp, sp, 4 addi sp, sp, 4
sw ra, 0(sp) sw ra, -4(sp)
call __builtin_riscv_workitem_id_z call __builtin_riscv_workitem_id_z
csrr a0, CSR_KNL # Get kernel metadata buffer csrr a0, CSR_KNL # Get kernel metadata buffer
csrr t1, CSR_GID_Z # Get group_id_z csrr t1, CSR_GID_Z # Get group_id_z
@ -248,7 +248,7 @@ __builtin_riscv_global_id_z:
mul t2, t2, t1 # CSR_GID_Z * local_size_z mul t2, t2, t1 # CSR_GID_Z * local_size_z
add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z) add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z)
vadd.vx v0, v0, t2 # global_id_z vadd.vx v0, v0, t2 # global_id_z
lw ra, 0(sp) lw ra, -4(sp)
addi sp, sp, -4 addi sp, sp, -4
ret ret
@ -387,4 +387,3 @@ __builtin_riscv_work_dim:
lw t0, KNL_WORK_DIM(a0) # Get work_dim lw t0, KNL_WORK_DIM(a0) # Get work_dim
vmv.v.x v0, t0 vmv.v.x v0, t0
ret ret

View File

@ -521,13 +521,13 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
// because the parameters spilling to the stack are not in the current TP // because the parameters spilling to the stack are not in the current TP
// stack, the offset in the current stack should not be calculated from a // stack, the offset in the current stack should not be calculated from a
// negative FI. // negative FI.
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI; I++) { for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) { if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
// Need to consider the alignment for different frame index // Need to consider the alignment for different frame index
Align Alignment = Align Alignment =
MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I); MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
uint64_t AlignedSize = alignTo(MFI.getObjectSize(I), Alignment); StackSize += MFI.getObjectSize(I);
StackSize += AlignedSize; StackSize = alignTo(StackSize, Alignment);
} }
} }
@ -537,7 +537,7 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
if (FI < 0 && !MF.getFunction().isVarArg()) if (FI < 0 && !MF.getFunction().isVarArg())
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill); StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
return alignTo(StackSize, Align(4)); return StackSize;
} }
StackOffset StackOffset

View File

@ -11832,7 +11832,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
else else
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_Ventus); analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_Ventus);
SmallVector<SDValue> MemVec;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i]; CCValAssign &VA = ArgLocs[i];
SDValue ArgValue; SDValue ArgValue;
@ -11859,15 +11858,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
else if (VA.isRegLoc()) else if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]); ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
else { else
// Temporarily put the created parameter node to MemVec instead of to
// InVals directly because it will be reversed later and then put to
// InVals.
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
MemVec.push_back(ArgValue);
continue;
}
if (VA.getLocInfo() == CCValAssign::Indirect) { if (VA.getLocInfo() == CCValAssign::Indirect) {
// If the original argument was split and passed by reference (e.g. i128 // If the original argument was split and passed by reference (e.g. i128
@ -11897,12 +11889,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
} }
} }
// Reverse MemVec and fill in InVals to ensure that the order in which the
// callee functions are fetched is the same as the order in which it was
// processed here.
while (MemVec.size())
InVals.push_back(MemVec.pop_back_val());
if (IsVarArg) { if (IsVarArg) {
// When it come to vardic arguments, the vardic function also need to follow // When it come to vardic arguments, the vardic function also need to follow
// no-kernel function calling convention, we need to use VGPRs to pass // no-kernel function calling convention, we need to use VGPRs to pass
@ -12124,6 +12110,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains; SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr; SDValue StackPtr;
// Get the value of adjusting the stack frame before the Call.
uint64_t CurrentFrameSize = Chain->getConstantOperandVal(1);
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i]; CCValAssign &VA = ArgLocs[i];
SDValue ArgValue = OutVals[i]; SDValue ArgValue = OutVals[i];
@ -12229,8 +12218,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT); StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
SDValue Address = SDValue Address =
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
DAG.getIntPtrConstant((int)VA.getLocMemOffset() > 0 ? DAG.getIntPtrConstant(-((int)VA.getLocMemOffset()
(-VA.getLocMemOffset()) : VA.getLocMemOffset(), DL)); + CurrentFrameSize), DL));
// Emit the store. // Emit the store.
MemOpChains.push_back( MemOpChains.push_back(