[VENTUS][fix] Fix the Offset of private variable offset on stack

Fix the Offset of private variable offset on stack.
2023-12-22 14:13:04 +08:00 · 2023-12-22 14:13:04 +08:00 · d809d3a2bd
parent 755797e27c
commit d809d3a2bd
3 changed files with 18 additions and 30 deletions
--- a/libclc/riscv32/lib/workitem/workitem.S
+++ b/libclc/riscv32/lib/workitem/workitem.S
@ -81,7 +81,7 @@ __builtin_riscv_workitem_linear_id:
  .type __builtin_riscv_global_linear_id, @function
 __builtin_riscv_global_linear_id:
  addi sp, sp, 4
-  sw ra, 0(sp)
+  sw ra, -4(sp)
  csrr a3, CSR_KNL           # Get kernel metadata buffer
  lw t0, KNL_WORK_DIM(a3)    # Get work_dims
  call __builtin_riscv_global_id_x
@ -109,7 +109,7 @@ __builtin_riscv_global_linear_id:
  vadd.vv v5, v5, v6      # global_linear_id3 = tmp + global_linear_id2
 .GLR:
  vadd.vx v0, v5, zero    # Return global_linear_id for 1/2/3 dims
-  lw ra, 0(sp)
+  lw ra, -4(sp)
  addi sp, sp, -4
  ret
@ -202,7 +202,7 @@ __builtin_riscv_workitem_id_z:
  .type __builtin_riscv_global_id_x, @function
 __builtin_riscv_global_id_x:
  addi sp, sp, 4
-  sw ra, 0(sp)
+  sw ra, -4(sp)
  call __builtin_riscv_workitem_id_x
  csrr a0, CSR_KNL        # Get kernel metadata buffer
  csrr t1, CSR_GID_X      # Get group_id_x
@ -211,7 +211,7 @@ __builtin_riscv_global_id_x:
  mul t6, t1, t3          # CSR_GID_X * local_size_x
  add t6, t6, t4          # Get global_offset_x + CSR_GID_X * local_size_x
  vadd.vx v0,v0, t6
-  lw ra, 0(sp)
+  lw ra, -4(sp)
  addi sp, sp, -4
  ret
@ -221,7 +221,7 @@ __builtin_riscv_global_id_x:
  .type __builtin_riscv_global_id_y, @function
 __builtin_riscv_global_id_y:
  addi sp, sp, 4
-  sw ra, 0(sp)
+  sw ra, -4(sp)
  call __builtin_riscv_workitem_id_y
  csrr t1, CSR_GID_Y      # Get group_id_y
  lw t2, KNL_LC_SIZE_Y(a0) # Get local_size_y
@ -229,7 +229,7 @@ __builtin_riscv_global_id_y:
  mul t3, t1, t2          # CSR_GID_Y * local_size_y
  add t3, t3, t4          # global_offset_y  + (CSR_GID_Y * local_size_y)
  vadd.vx v0, v0, t3      # global_id_y
-  lw ra, 0(sp)
+  lw ra, -4(sp)
  addi sp, sp, -4
  ret
@ -239,7 +239,7 @@ __builtin_riscv_global_id_y:
  .type __builtin_riscv_global_id_z, @function
 __builtin_riscv_global_id_z:
  addi sp, sp, 4
-  sw ra, 0(sp)
+  sw ra, -4(sp)
  call __builtin_riscv_workitem_id_z
  csrr a0, CSR_KNL        # Get kernel metadata buffer
  csrr t1, CSR_GID_Z      # Get group_id_z
@ -248,7 +248,7 @@ __builtin_riscv_global_id_z:
  mul t2, t2, t1          # CSR_GID_Z * local_size_z
  add t2, t2, t3          # global_offset_z + (CSR_GID_Z * local_size_z)
  vadd.vx v0, v0, t2      # global_id_z
-  lw ra, 0(sp)
+  lw ra, -4(sp)
  addi sp, sp, -4
  ret
@ -387,4 +387,3 @@ __builtin_riscv_work_dim:
  lw t0, KNL_WORK_DIM(a0)   # Get work_dim
  vmv.v.x v0, t0
  ret
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@ -521,13 +521,13 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
  // because the parameters spilling to the stack are not in the current TP 
  // stack, the offset in the current stack should not be calculated from a 
  // negative FI.
-  for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI; I++) {
+  for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
    if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
      // Need to consider the alignment for different frame index
      Align Alignment =
          MFI.getObjectAlign(I).value() <= 4 ? Align(4) : MFI.getObjectAlign(I);
-      uint64_t AlignedSize = alignTo(MFI.getObjectSize(I), Alignment);
+      StackSize += MFI.getObjectSize(I);
-      StackSize += AlignedSize;
+      StackSize = alignTo(StackSize, Alignment);
    }
  }
@ -537,7 +537,7 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
  if (FI < 0 && !MF.getFunction().isVarArg())
    StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
-  return alignTo(StackSize, Align(4));
+  return StackSize;
 }
 StackOffset
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -11832,7 +11832,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
  else
    analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_Ventus);
  SmallVector<SDValue> MemVec;
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    SDValue ArgValue;
@ -11859,15 +11858,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
        ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
      else if (VA.isRegLoc())
        ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
-      else {
+      else 
        // Temporarily put the created parameter node to MemVec instead of to 
        // InVals directly because it will be reversed later and then put to 
        // InVals.
        ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
        MemVec.push_back(ArgValue);
        continue;
      }
      if (VA.getLocInfo() == CCValAssign::Indirect) {
        // If the original argument was split and passed by reference (e.g. i128
@ -11897,12 +11889,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
    }
  }
  // Reverse MemVec and fill in InVals to ensure that the order in which the 
  // callee functions are fetched is the same as the order in which it was 
  // processed here.
  while (MemVec.size())
    InVals.push_back(MemVec.pop_back_val());
  if (IsVarArg) {
    // When it come to vardic arguments, the vardic function also need to follow
    // no-kernel function calling convention, we need to use VGPRs to pass
@ -12124,6 +12110,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
  SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;
  SDValue StackPtr;
  // Get the value of adjusting the stack frame before the Call.
  uint64_t CurrentFrameSize = Chain->getConstantOperandVal(1);
  for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    SDValue ArgValue = OutVals[i];
@ -12229,8 +12218,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
        StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
      SDValue Address =
          DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
-                      DAG.getIntPtrConstant((int)VA.getLocMemOffset() > 0 ? 
+                      DAG.getIntPtrConstant(-((int)VA.getLocMemOffset() 
-                      (-VA.getLocMemOffset()) : VA.getLocMemOffset(), DL));
+                      + CurrentFrameSize), DL));
      // Emit the store.
      MemOpChains.push_back(