forked from OSchip/llvm-project
[VENTUS][fix] Fix the Offset of private variable offset on stack
Fix the Offset of private variable offset on stack.
This commit is contained in:
parent
755797e27c
commit
d809d3a2bd
|
@ -81,7 +81,7 @@ __builtin_riscv_workitem_linear_id:
|
||||||
.type __builtin_riscv_global_linear_id, @function
|
.type __builtin_riscv_global_linear_id, @function
|
||||||
__builtin_riscv_global_linear_id:
|
__builtin_riscv_global_linear_id:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
csrr a3, CSR_KNL # Get kernel metadata buffer
|
csrr a3, CSR_KNL # Get kernel metadata buffer
|
||||||
lw t0, KNL_WORK_DIM(a3) # Get work_dims
|
lw t0, KNL_WORK_DIM(a3) # Get work_dims
|
||||||
call __builtin_riscv_global_id_x
|
call __builtin_riscv_global_id_x
|
||||||
|
@ -109,7 +109,7 @@ __builtin_riscv_global_linear_id:
|
||||||
vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2
|
vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2
|
||||||
.GLR:
|
.GLR:
|
||||||
vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims
|
vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ __builtin_riscv_workitem_id_z:
|
||||||
.type __builtin_riscv_global_id_x, @function
|
.type __builtin_riscv_global_id_x, @function
|
||||||
__builtin_riscv_global_id_x:
|
__builtin_riscv_global_id_x:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_x
|
call __builtin_riscv_workitem_id_x
|
||||||
csrr a0, CSR_KNL # Get kernel metadata buffer
|
csrr a0, CSR_KNL # Get kernel metadata buffer
|
||||||
csrr t1, CSR_GID_X # Get group_id_x
|
csrr t1, CSR_GID_X # Get group_id_x
|
||||||
|
@ -211,7 +211,7 @@ __builtin_riscv_global_id_x:
|
||||||
mul t6, t1, t3 # CSR_GID_X * local_size_x
|
mul t6, t1, t3 # CSR_GID_X * local_size_x
|
||||||
add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x
|
add t6, t6, t4 # Get global_offset_x + CSR_GID_X * local_size_x
|
||||||
vadd.vx v0,v0, t6
|
vadd.vx v0,v0, t6
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ __builtin_riscv_global_id_x:
|
||||||
.type __builtin_riscv_global_id_y, @function
|
.type __builtin_riscv_global_id_y, @function
|
||||||
__builtin_riscv_global_id_y:
|
__builtin_riscv_global_id_y:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_y
|
call __builtin_riscv_workitem_id_y
|
||||||
csrr t1, CSR_GID_Y # Get group_id_y
|
csrr t1, CSR_GID_Y # Get group_id_y
|
||||||
lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
|
lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
|
||||||
|
@ -229,7 +229,7 @@ __builtin_riscv_global_id_y:
|
||||||
mul t3, t1, t2 # CSR_GID_Y * local_size_y
|
mul t3, t1, t2 # CSR_GID_Y * local_size_y
|
||||||
add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y)
|
add t3, t3, t4 # global_offset_y + (CSR_GID_Y * local_size_y)
|
||||||
vadd.vx v0, v0, t3 # global_id_y
|
vadd.vx v0, v0, t3 # global_id_y
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -239,7 +239,7 @@ __builtin_riscv_global_id_y:
|
||||||
.type __builtin_riscv_global_id_z, @function
|
.type __builtin_riscv_global_id_z, @function
|
||||||
__builtin_riscv_global_id_z:
|
__builtin_riscv_global_id_z:
|
||||||
addi sp, sp, 4
|
addi sp, sp, 4
|
||||||
sw ra, 0(sp)
|
sw ra, -4(sp)
|
||||||
call __builtin_riscv_workitem_id_z
|
call __builtin_riscv_workitem_id_z
|
||||||
csrr a0, CSR_KNL # Get kernel metadata buffer
|
csrr a0, CSR_KNL # Get kernel metadata buffer
|
||||||
csrr t1, CSR_GID_Z # Get group_id_z
|
csrr t1, CSR_GID_Z # Get group_id_z
|
||||||
|
@ -248,7 +248,7 @@ __builtin_riscv_global_id_z:
|
||||||
mul t2, t2, t1 # CSR_GID_Z * local_size_z
|
mul t2, t2, t1 # CSR_GID_Z * local_size_z
|
||||||
add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z)
|
add t2, t2, t3 # global_offset_z + (CSR_GID_Z * local_size_z)
|
||||||
vadd.vx v0, v0, t2 # global_id_z
|
vadd.vx v0, v0, t2 # global_id_z
|
||||||
lw ra, 0(sp)
|
lw ra, -4(sp)
|
||||||
addi sp, sp, -4
|
addi sp, sp, -4
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -387,4 +387,3 @@ __builtin_riscv_work_dim:
|
||||||
lw t0, KNL_WORK_DIM(a0) # Get work_dim
|
lw t0, KNL_WORK_DIM(a0) # Get work_dim
|
||||||
vmv.v.x v0, t0
|
vmv.v.x v0, t0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
|
@ -521,13 +521,13 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
|
||||||
// because the parameters spilling to the stack are not in the current TP
|
// because the parameters spilling to the stack are not in the current TP
|
||||||
// stack, the offset in the current stack should not be calculated from a
|
// stack, the offset in the current stack should not be calculated from a
|
||||||
// negative FI.
|
// negative FI.
|
||||||
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI; I++) {
|
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
|
||||||
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
|
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
|
||||||
// Need to consider the alignment for different frame index
|
// Need to consider the alignment for different frame index
|
||||||
Align Alignment =
|
Align Alignment =
|
||||||
MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
|
MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
|
||||||
uint64_t AlignedSize = alignTo(MFI.getObjectSize(I), Alignment);
|
StackSize += MFI.getObjectSize(I);
|
||||||
StackSize += AlignedSize;
|
StackSize = alignTo(StackSize, Alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -537,7 +537,7 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
|
||||||
if (FI < 0 && !MF.getFunction().isVarArg())
|
if (FI < 0 && !MF.getFunction().isVarArg())
|
||||||
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
|
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
|
|
||||||
return alignTo(StackSize, Align(4));
|
return StackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
StackOffset
|
StackOffset
|
||||||
|
|
|
@ -11832,7 +11832,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
else
|
else
|
||||||
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_Ventus);
|
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_Ventus);
|
||||||
|
|
||||||
SmallVector<SDValue> MemVec;
|
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
SDValue ArgValue;
|
SDValue ArgValue;
|
||||||
|
@ -11859,15 +11858,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
|
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
|
||||||
else if (VA.isRegLoc())
|
else if (VA.isRegLoc())
|
||||||
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
|
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
|
||||||
else {
|
else
|
||||||
// Temporarily put the created parameter node to MemVec instead of to
|
|
||||||
// InVals directly because it will be reversed later and then put to
|
|
||||||
// InVals.
|
|
||||||
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
|
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
|
||||||
MemVec.push_back(ArgValue);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
||||||
// If the original argument was split and passed by reference (e.g. i128
|
// If the original argument was split and passed by reference (e.g. i128
|
||||||
|
@ -11897,12 +11889,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reverse MemVec and fill in InVals to ensure that the order in which the
|
|
||||||
// callee functions are fetched is the same as the order in which it was
|
|
||||||
// processed here.
|
|
||||||
while (MemVec.size())
|
|
||||||
InVals.push_back(MemVec.pop_back_val());
|
|
||||||
|
|
||||||
if (IsVarArg) {
|
if (IsVarArg) {
|
||||||
// When it come to vardic arguments, the vardic function also need to follow
|
// When it come to vardic arguments, the vardic function also need to follow
|
||||||
// no-kernel function calling convention, we need to use VGPRs to pass
|
// no-kernel function calling convention, we need to use VGPRs to pass
|
||||||
|
@ -12124,6 +12110,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||||
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
|
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
|
||||||
SmallVector<SDValue, 8> MemOpChains;
|
SmallVector<SDValue, 8> MemOpChains;
|
||||||
SDValue StackPtr;
|
SDValue StackPtr;
|
||||||
|
|
||||||
|
// Get the value of adjusting the stack frame before the Call.
|
||||||
|
uint64_t CurrentFrameSize = Chain->getConstantOperandVal(1);
|
||||||
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
SDValue ArgValue = OutVals[i];
|
SDValue ArgValue = OutVals[i];
|
||||||
|
@ -12229,8 +12218,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||||
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
||||||
SDValue Address =
|
SDValue Address =
|
||||||
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
||||||
DAG.getIntPtrConstant((int)VA.getLocMemOffset() > 0 ?
|
DAG.getIntPtrConstant(-((int)VA.getLocMemOffset()
|
||||||
(-VA.getLocMemOffset()) : VA.getLocMemOffset(), DL));
|
+ CurrentFrameSize), DL));
|
||||||
|
|
||||||
// Emit the store.
|
// Emit the store.
|
||||||
MemOpChains.push_back(
|
MemOpChains.push_back(
|
||||||
|
|
Loading…
Reference in New Issue