Generate vlw/vsw instructions for private memory access
This commit is contained in:
parent
f565a5f146
commit
78434601d8
|
@ -30,6 +30,22 @@ class DivergentUnaryFrag<SDPatternOperator Op> : PatFrag<
|
|||
(Op $src0),
|
||||
[{ return N->isDivergent(); }]>;
|
||||
|
||||
class DivergentPrivateLoadFrag<SDPatternOperator Op> : PatFrag<
|
||||
(ops node:$src0),
|
||||
(Op $src0),
|
||||
[{
|
||||
return N->isDivergent() &&
|
||||
cast<LoadSDNode>(N)->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class DivergentNonPrivateLoadFrag<SDPatternOperator Op> : PatFrag<
|
||||
(ops node:$src0),
|
||||
(Op $src0),
|
||||
[{
|
||||
return N->isDivergent() &&
|
||||
cast<LoadSDNode>(N)->getAddressSpace() != RISCVAS::PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class UniformBinFrag<SDPatternOperator Op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(Op $src0, $src1),
|
||||
|
@ -40,6 +56,22 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag<
|
|||
(Op $src0, $src1),
|
||||
[{ return N->isDivergent(); }]>;
|
||||
|
||||
class DivergentPrivateStoreFrag<SDPatternOperator Op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(Op $src0, $src1),
|
||||
[{
|
||||
return N->isDivergent() &&
|
||||
cast<StoreSDNode>(N)->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
class DivergentNonPrivateStoreFrag<SDPatternOperator Op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(Op $src0, $src1),
|
||||
[{
|
||||
return N->isDivergent() &&
|
||||
cast<StoreSDNode>(N)->getAddressSpace() != RISCVAS::PRIVATE_ADDRESS;
|
||||
}]>;
|
||||
|
||||
// Widen sALU PatFrag for such as ADDW/SUBW to be selected from i64 add/sub
|
||||
// if only the lower 32 bits of their result is used.
|
||||
class UniformWBinFrag<SDPatternOperator Op> : PatFrag<
|
||||
|
|
|
@ -44,13 +44,23 @@ multiclass PatVFRBin<SDPatternOperator Op, list<RVInst> Insts> {
|
|||
(Insts[2] GPRF32:$rs2, VGPR:$rs1)>;
|
||||
}
|
||||
|
||||
class DivergentLdPat<PatFrag LoadOp, RVInst Inst>
|
||||
: Pat<(XLenVT (DivergentUnaryFrag<LoadOp>
|
||||
class DivergentPriLdPat<PatFrag LoadOp, RVInst Inst>
|
||||
: Pat<(XLenVT (DivergentPrivateLoadFrag<LoadOp>
|
||||
(AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2)))),
|
||||
(Inst GPR:$rs1, VGPR:$vs2)>;
|
||||
|
||||
class DivergentStPat<PatFrag StoreOp, RVInst Inst>
|
||||
: Pat<(DivergentBinFrag<StoreOp>
|
||||
class DivergentPriStPat<PatFrag StoreOp, RVInst Inst>
|
||||
: Pat<(DivergentPrivateStoreFrag<StoreOp>
|
||||
(XLenVT VGPR:$vs3), (AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2))),
|
||||
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
|
||||
|
||||
class DivergentNonPriLdPat<PatFrag LoadOp, RVInst Inst>
|
||||
: Pat<(XLenVT (DivergentNonPrivateLoadFrag<LoadOp>
|
||||
(AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2)))),
|
||||
(Inst GPR:$rs1, VGPR:$vs2)>;
|
||||
|
||||
class DivergentNonPriStPat<PatFrag StoreOp, RVInst Inst>
|
||||
: Pat<(DivergentNonPrivateStoreFrag<StoreOp>
|
||||
(XLenVT VGPR:$vs3), (AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2))),
|
||||
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
|
||||
|
||||
|
@ -1147,18 +1157,26 @@ defm : AnyPatVFRTer<any_fma, [VFMADD_VV, VFMADD_VF]>;
|
|||
defm : PatVFRTer<[DivergentBinFrag<fsub>, DivergentBinFrag<fmul>],
|
||||
[VFMSUB_VV, VFMSUB_VF]>;
|
||||
// vfsqrt.v
|
||||
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;
|
||||
// TODO: vfrec7.v? what is this
|
||||
def : DivergentLdPat<sextloadi8, VLUXEI8>;
|
||||
def : DivergentLdPat<extloadi8, VLUXEI8>;
|
||||
def : DivergentLdPat<sextloadi16, VLUXEI16>;
|
||||
def : DivergentLdPat<extloadi16, VLUXEI16>;
|
||||
def : DivergentLdPat<load, VLUXEI32>;
|
||||
//def : DivergentLdPat<zextloadi8, VLUXEI8U>;
|
||||
//def : DivergentLdPat<zextloadi16, VLUXEI16U>;
|
||||
def : DivergentStPat<truncstorei8, VSUXEI8>;
|
||||
def : DivergentStPat<truncstorei16, VSUXEI16>;
|
||||
def : DivergentStPat<store, VSUXEI32>;
|
||||
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;
|
||||
|
||||
|
||||
// Non-private memory load/store
|
||||
def : DivergentNonPriLdPat<sextloadi8, VLUXEI8>;
|
||||
def : DivergentNonPriLdPat<extloadi8, VLUXEI8>;
|
||||
def : DivergentNonPriLdPat<sextloadi16, VLUXEI16>;
|
||||
def : DivergentNonPriLdPat<extloadi16, VLUXEI16>;
|
||||
def : DivergentNonPriLdPat<load, VLUXEI32>;
|
||||
//def : DivergentNonPriLdPat<zextloadi8, VLUXEI8U>;
|
||||
//def : DivergentNonPriLdPat<zextloadi16, VLUXEI16U>;
|
||||
def : DivergentNonPriStPat<truncstorei8, VSUXEI8>;
|
||||
def : DivergentNonPriStPat<truncstorei16, VSUXEI16>;
|
||||
def : DivergentNonPriStPat<store, VSUXEI32>;
|
||||
|
||||
// Private memory per-thread load/store
|
||||
def : DivergentPriLdPat<load, VLW>;
|
||||
def : DivergentPriStPat<store, VSW>;
|
||||
|
||||
|
||||
def DivergentSelectCCFrag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
|
||||
node:$truev, node:$falsev),
|
||||
|
|
|
@ -39,7 +39,7 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: add x18, x18, x10
|
||||
; VENTUS-NEXT: add x9, x9, x10
|
||||
; VENTUS-NEXT: lw x11, 0(x9)
|
||||
; VENTUS-NEXT: vluxei32.v v1, (x18), v0
|
||||
; VENTUS-NEXT: vlw v1, x0(x18)
|
||||
; VENTUS-NEXT: vmv.s.x v2, x11
|
||||
; VENTUS-NEXT: add x10, x8, x10
|
||||
; VENTUS-NEXT: lw x11, 0(x10)
|
||||
|
|
Loading…
Reference in New Issue