[InstCombine] try to determine "exact" for sdiv

If the divisor is a power-of-2 or negative-power-of-2 and the dividend is known to have >= trailing zeros than the divisor, the division is exact: https://alive2.llvm.org/ce/z/UGBksM (general proof) https://alive2.llvm.org/ce/z/D4yPS- (examples based on regression tests) This isn't the most direct optimization (we could create ashr in these examples instead of relying on existing folds for exact divides), but it's possible that there's a more general constraint than just a pow2 divisor, so this might be extended in the future. This should solve issue #58348. Differential Revision: https://reviews.llvm.org/D135970
2022-10-16 10:01:10 -04:00 · 2022-10-16 10:01:10 -04:00 · e5ee0b06d6
parent 78e3aeda3c
commit e5ee0b06d6
3 changed files with 29 additions and 12 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@ -1332,7 +1332,15 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
                              ConstantInt::getAllOnesValue(Ty));
  }

-  if (isKnownNonNegative(Op0, DL, 0, &AC, &I, &DT)) {
+  KnownBits KnownDividend = computeKnownBits(Op0, 0, &I);
+  if (!I.isExact() &&
+      (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
+      KnownDividend.countMinTrailingZeros() >= Op1C->countTrailingZeros()) {
+    I.setIsExact();
+    return &I;
+  }
+
+  if (KnownDividend.isNonNegative()) {
    // If both operands are unsigned, turn this into a udiv.
    if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) {
      auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@ -64,8 +64,9 @@ define <2 x i8> @n4_vec_undef(<2 x i8> %x) {

 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D_NEG:%.*]] = and i8 [[A]], -8
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[D_NEG]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
  %a = and i8 %x, -32
@ -75,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {

 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -32
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[A]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
  %a = and i8 %x, -32
@ -84,6 +85,8 @@ define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
  ret i8 %d
 }

+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
@ -98,7 +101,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], <i8 4, i8 4>
+; CHECK-NEXT:    [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
  %a = shl <2 x i8> %x, <i8 5, i8 5>
@ -106,6 +110,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
  ret <2 x i8> %d
 }

+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
@ -110,8 +110,8 @@ define i8 @shl1_nsw_not_exact(i8 %x, i8 %y) {

 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D:%.*]] = and i8 [[A]], -2
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
  %a = and i8 %x, -8
@ -121,15 +121,16 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {

 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 8
-; CHECK-NEXT:    ret i8 [[D]]
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i8 [[A]]
 ;
  %a = and i8 %x, -8
  %d = sdiv i8 %a, 8
  ret i8 %d
 }

+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
@ -144,7 +145,7 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 8, i8 8>
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i8> [[A]], <i8 3, i8 3>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
  %a = shl <2 x i8> %x, <i8 3, i8 3>
@ -152,6 +153,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
  ret <2 x i8> %d
 }

+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>