[VENTUS][fix] Fix load instruction selection pattern for vastart

In standard riscv vararg support, the varstart frame index will be stored in stack,
but because if the design of ventus, some code generation  will be like this
        vlw.v   v0, -44(v8)
        vadd.vi v1, v0, 4
        vsw.v   v1, -44(v8)
        \vlw12.v v0, 0(v0)
the last vlw12 instruction is actually illeagl, it should be vlw
This commit is contained in:
zhoujingya 2023-09-05 11:15:53 +08:00 committed by zhoujingya
parent 3a465aa795
commit b7b8fa50ba
4 changed files with 40 additions and 18 deletions

View File

@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@ -36,6 +37,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@ -4554,7 +4556,9 @@ SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy(MF.getDataLayout()));
auto *FrameIndex = cast<FrameIndexSDNode>(Op.getOperand(1));
assert(FrameIndex && "Not frame index node");
setVastartStoreFrameIndex(FrameIndex->getIndex());
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV= cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@ -13390,13 +13394,16 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
}
case ISD::LOAD: {
const LoadSDNode *L = cast<LoadSDNode>(N);
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
L->getAddressSpace() == RISCVAS::LOCAL_ADDRESS;
// If load from varstart store frame index, load action is divergent
if( auto *Base = dyn_cast<LoadSDNode>(L->getBasePtr()))
if(auto *BaseBase = dyn_cast<FrameIndexSDNode>(Base->getOperand(1)))
if(BaseBase->getIndex() == getVastartStoreFrameIndex())
return true;
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
}
case ISD::STORE: {
const StoreSDNode *Store= cast<StoreSDNode>(N);
return Store->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
Store->getAddressSpace() == RISCVAS::LOCAL_ADDRESS ||
Store->getPointerInfo().StackID == RISCVStackID::VGPRSpill;
}
case ISD::CALLSEQ_END:

View File

@ -334,10 +334,11 @@ enum NodeType : unsigned {
class RISCVTargetLowering : public TargetLowering {
const RISCVSubtarget &Subtarget;
int *VastartStoreFrameIndex = new int;
public:
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);
~RISCVTargetLowering() { delete VastartStoreFrameIndex; }
const RISCVSubtarget &getSubtarget() const { return Subtarget; }
@ -479,6 +480,12 @@ public:
return ISD::SIGN_EXTEND;
}
int getVastartStoreFrameIndex() const { return *VastartStoreFrameIndex; }
void setVastartStoreFrameIndex(int Index) const {
*VastartStoreFrameIndex = Index;
}
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
if (DAG.getMachineFunction().getFunction().hasMinSize())
return false;

View File

@ -40,10 +40,17 @@ class DivergentPrivateLoadFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0),
(Op $src0),
[{
const LoadSDNode *L = cast<LoadSDNode>(N);
bool IsDivergent = false;
if( auto *Base = dyn_cast<LoadSDNode>(L->getBasePtr()))
if(auto *BaseBase = dyn_cast<FrameIndexSDNode>(Base->getOperand(1)))
if(BaseBase->getIndex() == CurDAG->getMachineFunction().
getSubtarget<RISCVSubtarget>().getTargetLowering()->getVastartStoreFrameIndex())
IsDivergent = true;
return N->isDivergent() &&
(cast<LoadSDNode>(N)->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
cast<LoadSDNode>(N)->getPointerInfo().StackID == RISCVStackID::VGPRSpill
);
|| IsDivergent);
}]>;
class DivergentNonPrivateLoadFrag<SDPatternOperator Op> : PatFrag<

View File

@ -1230,6 +1230,19 @@ def VFTTA : RVInstIVI<0b000011, (outs VGPR:$vd_wb),
// Ventus vALU divergent execution patterns
//===----------------------------------------------------------------------===//
// ATTENTION: please don't change the pattern order
// Private memory per-thread load/store
def : DivergentPriLdPat<load, VLW>;
def : DivergentPriLdPat<zextloadi16, VLHU>;
def : DivergentPriLdPat<sextloadi16, VLH>;
def : DivergentPriLdPat<extloadi16, VLH>;
def : DivergentPriLdPat<zextloadi8, VLBU>;
def : DivergentPriLdPat<extloadi8, VLB>;
def : DivergentPriLdPat<sextloadi8, VLB>;
def : DivergentPriStPat<store, VSW>;
def : DivergentPriStPat<truncstorei16, VSH>;
def : DivergentPriStPat<truncstorei8, VSB>;
// Non-private memory load/store
// TODO: add store/load test file for testing pattern match
def : DivergentNonPriLdImmPat<load, VLWI12>;
@ -1253,18 +1266,6 @@ def : DivergentNonPriStPat<truncstorei8, VSUXEI8>;
def : DivergentNonPriStPat<truncstorei16, VSUXEI16>;
def : DivergentNonPriStPat<store, VSUXEI32>;
// Private memory per-thread load/store
def : DivergentPriLdPat<load, VLW>;
def : DivergentPriLdPat<zextloadi16, VLHU>;
def : DivergentPriLdPat<sextloadi16, VLH>;
def : DivergentPriLdPat<extloadi16, VLH>;
def : DivergentPriLdPat<zextloadi8, VLBU>;
def : DivergentPriLdPat<extloadi8, VLB>;
def : DivergentPriLdPat<sextloadi8, VLB>;
def : DivergentPriStPat<store, VSW>;
def : DivergentPriStPat<truncstorei16, VSH>;
def : DivergentPriStPat<truncstorei8, VSB>;
// FIXME: check this review: https://reviews.llvm.org/D131729#inline-1269307
// def : PatIntSetCC<[VGPR, VGPR], SETLE, VMSLE_VV>;
// def : PatIntSetCC<[VGPR, GPR], SETLE, VMSLE_VX>;