[InstCombine] fold mul with masked low bit operand to trunc+select

https://alive2.llvm.org/ce/z/o7rQ5q

This shows an extra instruction in some cases, but that is
caused by an existing canonicalization of trunc -> and+icmp.

Codegen should be better for any target where a multiply is
more costly than the most simple ALU op.

This ends up producing the requested x86 asm from issue #55618,
but it's not the same IR. We are missing a canonicalization
from the negate+mask pattern to the trunc+select created here.
This commit is contained in:
Sanjay Patel 2022-06-05 17:55:09 -04:00
parent abb21b54bc
commit 3f33d67d8a
6 changed files with 34 additions and 25 deletions

View File

@ -390,6 +390,12 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
} }
// (and X, 1) * Y --> (trunc X) ? Y : 0
if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
}
// ((ashr X, 31) | 1) * X --> abs(X) // ((ashr X, 31) | 1) * X --> abs(X)
// X * ((ashr X, 31) | 1) --> abs(X) // X * ((ashr X, 31) | 1) --> abs(X)
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X), if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),

View File

@ -267,10 +267,11 @@ define i1 @pr51551_neg1(i32 %x, i32 %y) {
define i1 @pr51551_neg2(i32 %x, i32 %y) { define i1 @pr51551_neg2(i32 %x, i32 %y) {
; CHECK-LABEL: @pr51551_neg2( ; CHECK-LABEL: @pr51551_neg2(
; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], 1
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[T0]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7 ; CHECK-NEXT: [[X_OP:%.*]] = and i32 [[X:%.*]], 7
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X_OP]], 0
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[DOTNOT]], i1 true, i1 [[CMP1]]
; CHECK-NEXT: ret i1 [[CMP]] ; CHECK-NEXT: ret i1 [[CMP]]
; ;
%t0 = and i32 %y, -7 %t0 = and i32 %y, -7

View File

@ -749,16 +749,11 @@ define i1 @not_mul_of_bool_commute(i32 %x, i32 %y) {
ret i1 %r ret i1 %r
} }
; negative test - no leading zeros for 's' ; no leading zeros for 's', but we reduce this with other transforms
; TODO: If analysis was generalized for sign bits, we could reduce this to false.
define i1 @mul_of_bool_no_lz_other_op(i32 %x, i8 %y) { define i1 @mul_of_bool_no_lz_other_op(i32 %x, i8 %y) {
; CHECK-LABEL: @mul_of_bool_no_lz_other_op( ; CHECK-LABEL: @mul_of_bool_no_lz_other_op(
; CHECK-NEXT: [[B:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: ret i1 false
; CHECK-NEXT: [[S:%.*]] = sext i8 [[Y:%.*]] to i32
; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i32 [[B]], [[S]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[M]], 127
; CHECK-NEXT: ret i1 [[R]]
; ;
%b = and i32 %x, 1 %b = and i32 %x, 1
%s = sext i8 %y to i32 %s = sext i8 %y to i32

View File

@ -80,8 +80,8 @@ define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
define i8 @one_demanded_bit(i8 %x) { define i8 @one_demanded_bit(i8 %x) {
; CHECK-LABEL: @one_demanded_bit( ; CHECK-LABEL: @one_demanded_bit(
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 6 ; CHECK-NEXT: [[M:%.*]] = shl i8 [[X:%.*]], 6
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -65 ; CHECK-NEXT: [[R:%.*]] = or i8 [[M]], -65
; CHECK-NEXT: ret i8 [[R]] ; CHECK-NEXT: ret i8 [[R]]
; ;
%m = mul i8 %x, 192 ; 0b1100_0000 %m = mul i8 %x, 192 ; 0b1100_0000
@ -91,8 +91,8 @@ define i8 @one_demanded_bit(i8 %x) {
define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) { define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) {
; CHECK-LABEL: @one_demanded_bit_splat( ; CHECK-LABEL: @one_demanded_bit_splat(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5> ; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 32, i8 32> ; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], <i8 32, i8 32>
; CHECK-NEXT: ret <2 x i8> [[R]] ; CHECK-NEXT: ret <2 x i8> [[R]]
; ;
%m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000 %m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000
@ -201,9 +201,10 @@ define i64 @scalar_mul_bit_x0_y0_uses(i64 %x, i64 %y) {
; Negative test ; Negative test
define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) { define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
; CHECK-LABEL: @scalar_mul_bit_x0_y1( ; CHECK-LABEL: @scalar_mul_bit_x0_y1(
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2 ; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]] ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
; CHECK-NEXT: ret i64 [[MUL]] ; CHECK-NEXT: ret i64 [[MUL]]
; ;
%and1 = and i64 %x, 1 %and1 = and i64 %x, 1
@ -214,9 +215,10 @@ define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) { define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) {
; CHECK-LABEL: @scalar_mul_bit_x0_yC( ; CHECK-LABEL: @scalar_mul_bit_x0_yC(
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[AND1]], [[AND2]] ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
; CHECK-NEXT: ret i64 [[MUL]] ; CHECK-NEXT: ret i64 [[MUL]]
; ;
%and1 = and i64 %x, 1 %and1 = and i64 %x, 1

View File

@ -466,8 +466,9 @@ define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
define i32 @lowbit_mul(i32 %a, i32 %b) { define i32 @lowbit_mul(i32 %a, i32 %b) {
; CHECK-LABEL: @lowbit_mul( ; CHECK-LABEL: @lowbit_mul(
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 1
; CHECK-NEXT: [[E:%.*]] = mul nuw i32 [[D]], [[B:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[DOTNOT]], i32 0, i32 [[B:%.*]]
; CHECK-NEXT: ret i32 [[E]] ; CHECK-NEXT: ret i32 [[E]]
; ;
%d = and i32 %a, 1 %d = and i32 %a, 1
@ -480,8 +481,8 @@ define i32 @lowbit_mul(i32 %a, i32 %b) {
define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) { define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
; CHECK-LABEL: @lowbit_mul_commute( ; CHECK-LABEL: @lowbit_mul_commute(
; CHECK-NEXT: [[B:%.*]] = xor <2 x i17> [[P:%.*]], <i17 42, i17 43> ; CHECK-NEXT: [[B:%.*]] = xor <2 x i17> [[P:%.*]], <i17 42, i17 43>
; CHECK-NEXT: [[D:%.*]] = and <2 x i17> [[A:%.*]], <i17 1, i17 1> ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i17> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: [[E:%.*]] = mul nuw <2 x i17> [[B]], [[D]] ; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP1]], <2 x i17> [[B]], <2 x i17> zeroinitializer
; CHECK-NEXT: ret <2 x i17> [[E]] ; CHECK-NEXT: ret <2 x i17> [[E]]
; ;
%b = xor <2 x i17> %p, <i17 42, i17 43> ; thwart complexity-based canonicalization %b = xor <2 x i17> %p, <i17 42, i17 43> ; thwart complexity-based canonicalization
@ -490,6 +491,8 @@ define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
ret <2 x i17> %e ret <2 x i17> %e
} }
; negative test - extra use
define i32 @lowbit_mul_use(i32 %a, i32 %b) { define i32 @lowbit_mul_use(i32 %a, i32 %b) {
; CHECK-LABEL: @lowbit_mul_use( ; CHECK-LABEL: @lowbit_mul_use(
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1 ; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
@ -503,6 +506,8 @@ define i32 @lowbit_mul_use(i32 %a, i32 %b) {
ret i32 %e ret i32 %e
} }
; negative test - wrong mask
define i32 @not_lowbit_mul(i32 %a, i32 %b) { define i32 @not_lowbit_mul(i32 %a, i32 %b) {
; CHECK-LABEL: @not_lowbit_mul( ; CHECK-LABEL: @not_lowbit_mul(
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 2 ; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 2

View File

@ -1499,8 +1499,8 @@ define i32 @mul_no_common_bits_const_op(i32 %p) {
define <2 x i12> @mul_no_common_bits_commute(<2 x i12> %p) { define <2 x i12> @mul_no_common_bits_commute(<2 x i12> %p) {
; CHECK-LABEL: @mul_no_common_bits_commute( ; CHECK-LABEL: @mul_no_common_bits_commute(
; CHECK-NEXT: [[X:%.*]] = and <2 x i12> [[P:%.*]], <i12 1, i12 1> ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i12> [[P:%.*]] to <2 x i1>
; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <2 x i12> [[X]], <i12 15, i12 17> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i12> <i12 15, i12 17>, <2 x i12> zeroinitializer
; CHECK-NEXT: ret <2 x i12> [[R]] ; CHECK-NEXT: ret <2 x i12> [[R]]
; ;
%x = and <2 x i12> %p, <i12 1, i12 1> %x = and <2 x i12> %p, <i12 1, i12 1>