Generate vlw/vsw instructions for private memory access

This commit is contained in:
Aries 2023-01-03 11:08:24 +08:00
parent f565a5f146
commit 78434601d8
3 changed files with 66 additions and 16 deletions

View File

@ -30,6 +30,22 @@ class DivergentUnaryFrag<SDPatternOperator Op> : PatFrag<
(Op $src0),
[{ return N->isDivergent(); }]>;
class DivergentPrivateLoadFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0),
(Op $src0),
[{
return N->isDivergent() &&
cast<LoadSDNode>(N)->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
}]>;
class DivergentNonPrivateLoadFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0),
(Op $src0),
[{
return N->isDivergent() &&
cast<LoadSDNode>(N)->getAddressSpace() != RISCVAS::PRIVATE_ADDRESS;
}]>;
class UniformBinFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0, node:$src1),
(Op $src0, $src1),
@ -40,6 +56,22 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag<
(Op $src0, $src1),
[{ return N->isDivergent(); }]>;
class DivergentPrivateStoreFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0, node:$src1),
(Op $src0, $src1),
[{
return N->isDivergent() &&
cast<StoreSDNode>(N)->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
}]>;
class DivergentNonPrivateStoreFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0, node:$src1),
(Op $src0, $src1),
[{
return N->isDivergent() &&
cast<StoreSDNode>(N)->getAddressSpace() != RISCVAS::PRIVATE_ADDRESS;
}]>;
// Widen sALU PatFrag for such as ADDW/SUBW to be selected from i64 add/sub
// if only the lower 32 bits of their result is used.
class UniformWBinFrag<SDPatternOperator Op> : PatFrag<

View File

@ -44,13 +44,23 @@ multiclass PatVFRBin<SDPatternOperator Op, list<RVInst> Insts> {
(Insts[2] GPRF32:$rs2, VGPR:$rs1)>;
}
class DivergentLdPat<PatFrag LoadOp, RVInst Inst>
: Pat<(XLenVT (DivergentUnaryFrag<LoadOp>
class DivergentPriLdPat<PatFrag LoadOp, RVInst Inst>
: Pat<(XLenVT (DivergentPrivateLoadFrag<LoadOp>
(AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2)))),
(Inst GPR:$rs1, VGPR:$vs2)>;
class DivergentStPat<PatFrag StoreOp, RVInst Inst>
: Pat<(DivergentBinFrag<StoreOp>
class DivergentPriStPat<PatFrag StoreOp, RVInst Inst>
: Pat<(DivergentPrivateStoreFrag<StoreOp>
(XLenVT VGPR:$vs3), (AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2))),
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
class DivergentNonPriLdPat<PatFrag LoadOp, RVInst Inst>
: Pat<(XLenVT (DivergentNonPrivateLoadFrag<LoadOp>
(AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2)))),
(Inst GPR:$rs1, VGPR:$vs2)>;
class DivergentNonPriStPat<PatFrag StoreOp, RVInst Inst>
: Pat<(DivergentNonPrivateStoreFrag<StoreOp>
(XLenVT VGPR:$vs3), (AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2))),
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
@ -1147,18 +1157,26 @@ defm : AnyPatVFRTer<any_fma, [VFMADD_VV, VFMADD_VF]>;
defm : PatVFRTer<[DivergentBinFrag<fsub>, DivergentBinFrag<fmul>],
[VFMSUB_VV, VFMSUB_VF]>;
// vfsqrt.v
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;
// TODO: vfrec7.v? what is this
def : DivergentLdPat<sextloadi8, VLUXEI8>;
def : DivergentLdPat<extloadi8, VLUXEI8>;
def : DivergentLdPat<sextloadi16, VLUXEI16>;
def : DivergentLdPat<extloadi16, VLUXEI16>;
def : DivergentLdPat<load, VLUXEI32>;
//def : DivergentLdPat<zextloadi8, VLUXEI8U>;
//def : DivergentLdPat<zextloadi16, VLUXEI16U>;
def : DivergentStPat<truncstorei8, VSUXEI8>;
def : DivergentStPat<truncstorei16, VSUXEI16>;
def : DivergentStPat<store, VSUXEI32>;
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;
// Non-private memory load/store
def : DivergentNonPriLdPat<sextloadi8, VLUXEI8>;
def : DivergentNonPriLdPat<extloadi8, VLUXEI8>;
def : DivergentNonPriLdPat<sextloadi16, VLUXEI16>;
def : DivergentNonPriLdPat<extloadi16, VLUXEI16>;
def : DivergentNonPriLdPat<load, VLUXEI32>;
//def : DivergentNonPriLdPat<zextloadi8, VLUXEI8U>;
//def : DivergentNonPriLdPat<zextloadi16, VLUXEI16U>;
def : DivergentNonPriStPat<truncstorei8, VSUXEI8>;
def : DivergentNonPriStPat<truncstorei16, VSUXEI16>;
def : DivergentNonPriStPat<store, VSUXEI32>;
// Private memory per-thread load/store
def : DivergentPriLdPat<load, VLW>;
def : DivergentPriStPat<store, VSW>;
def DivergentSelectCCFrag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
node:$truev, node:$falsev),

View File

@ -39,7 +39,7 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: add x18, x18, x10
; VENTUS-NEXT: add x9, x9, x10
; VENTUS-NEXT: lw x11, 0(x9)
; VENTUS-NEXT: vluxei32.v v1, (x18), v0
; VENTUS-NEXT: vlw v1, x0(x18)
; VENTUS-NEXT: vmv.s.x v2, x11
; VENTUS-NEXT: add x10, x8, x10
; VENTUS-NEXT: lw x11, 0(x10)