[RISCV] Teach shouldSinkOperands that vp.add and friends are commutative.

We previously had a bug that our isel patterns weren't commutative,
but that has been fixed for a while.
This commit is contained in:
Craig Topper 2022-11-14 21:51:08 -08:00
parent b7d7c448df
commit 25dcca60f4
2 changed files with 15 additions and 22 deletions

View File

@ -1318,16 +1318,6 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Intrinsic::fma:
case Intrinsic::vp_fma:
return Operand == 0 || Operand == 1;
// FIXME: Our patterns can only match vx/vf instructions when the splat
// it on the RHS, because TableGen doesn't recognize our VP operations
// as commutative.
case Intrinsic::vp_add:
case Intrinsic::vp_mul:
case Intrinsic::vp_and:
case Intrinsic::vp_or:
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
case Intrinsic::vp_shl:
case Intrinsic::vp_lshr:
case Intrinsic::vp_ashr:
@ -1336,8 +1326,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Intrinsic::vp_urem:
case Intrinsic::vp_srem:
return Operand == 1;
// ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
// explicit patterns for both LHS and RHS (as 'vr' versions).
// These intrinsics are commutative.
case Intrinsic::vp_add:
case Intrinsic::vp_mul:
case Intrinsic::vp_and:
case Intrinsic::vp_or:
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
// These intrinsics have 'vr' versions.
case Intrinsic::vp_sub:
case Intrinsic::vp_fsub:
case Intrinsic::vp_fdiv:

View File

@ -3025,25 +3025,21 @@ for.cond.cleanup: ; preds = %vector.body
ret void
}
; FIXME: This doesn't match against vadd.vx because our patterns aren't
; commutative.
define void @sink_splat_vp_add_commute(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: sink_splat_vp_add_commute:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: li a1, 1024
; CHECK-NEXT: .LBB48_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v9, v8, v9, v0.t
; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: addi a3, a3, -4
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB48_1
; CHECK-NEXT: bnez a3, .LBB48_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry: