[InstCombine] fold mul with masked low bit operand to trunc+select
https://alive2.llvm.org/ce/z/o7rQ5q This shows an extra instruction in some cases, but that is caused by an existing canonicalization of trunc -> and+icmp. Codegen should be better for any target where a multiply is more costly than the most simple ALU op. This ends up producing the requested x86 asm from issue #55618, but it's not the same IR. We are missing a canonicalization from the negate+mask pattern to the trunc+select created here.
This commit is contained in:
parent
abb21b54bc
commit
3f33d67d8a
|
@ -390,6 +390,12 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
|
||||||
return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
|
return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (and X, 1) * Y --> (trunc X) ? Y : 0
|
||||||
|
if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
|
||||||
|
Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
|
||||||
|
return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
|
||||||
|
}
|
||||||
|
|
||||||
// ((ashr X, 31) | 1) * X --> abs(X)
|
// ((ashr X, 31) | 1) * X --> abs(X)
|
||||||
// X * ((ashr X, 31) | 1) --> abs(X)
|
// X * ((ashr X, 31) | 1) --> abs(X)
|
||||||
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
|
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
|
||||||
|
|
|
@ -267,10 +267,11 @@ define i1 @pr51551_neg1(i32 %x, i32 %y) {
|
||||||
|
|
||||||
define i1 @pr51551_neg2(i32 %x, i32 %y) {
|
define i1 @pr51551_neg2(i32 %x, i32 %y) {
|
||||||
; CHECK-LABEL: @pr51551_neg2(
|
; CHECK-LABEL: @pr51551_neg2(
|
||||||
; CHECK-NEXT: [[T0:%.*]] = and i32 [[Y:%.*]], 1
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], 1
|
||||||
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[T0]], [[X:%.*]]
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||||
; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], 7
|
; CHECK-NEXT: [[X_OP:%.*]] = and i32 [[X:%.*]], 7
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X_OP]], 0
|
||||||
|
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[DOTNOT]], i1 true, i1 [[CMP1]]
|
||||||
; CHECK-NEXT: ret i1 [[CMP]]
|
; CHECK-NEXT: ret i1 [[CMP]]
|
||||||
;
|
;
|
||||||
%t0 = and i32 %y, -7
|
%t0 = and i32 %y, -7
|
||||||
|
|
|
@ -749,16 +749,11 @@ define i1 @not_mul_of_bool_commute(i32 %x, i32 %y) {
|
||||||
ret i1 %r
|
ret i1 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; negative test - no leading zeros for 's'
|
; no leading zeros for 's', but we reduce this with other transforms
|
||||||
; TODO: If analysis was generalized for sign bits, we could reduce this to false.
|
|
||||||
|
|
||||||
define i1 @mul_of_bool_no_lz_other_op(i32 %x, i8 %y) {
|
define i1 @mul_of_bool_no_lz_other_op(i32 %x, i8 %y) {
|
||||||
; CHECK-LABEL: @mul_of_bool_no_lz_other_op(
|
; CHECK-LABEL: @mul_of_bool_no_lz_other_op(
|
||||||
; CHECK-NEXT: [[B:%.*]] = and i32 [[X:%.*]], 1
|
; CHECK-NEXT: ret i1 false
|
||||||
; CHECK-NEXT: [[S:%.*]] = sext i8 [[Y:%.*]] to i32
|
|
||||||
; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i32 [[B]], [[S]]
|
|
||||||
; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[M]], 127
|
|
||||||
; CHECK-NEXT: ret i1 [[R]]
|
|
||||||
;
|
;
|
||||||
%b = and i32 %x, 1
|
%b = and i32 %x, 1
|
||||||
%s = sext i8 %y to i32
|
%s = sext i8 %y to i32
|
||||||
|
|
|
@ -80,8 +80,8 @@ define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
|
||||||
|
|
||||||
define i8 @one_demanded_bit(i8 %x) {
|
define i8 @one_demanded_bit(i8 %x) {
|
||||||
; CHECK-LABEL: @one_demanded_bit(
|
; CHECK-LABEL: @one_demanded_bit(
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 6
|
; CHECK-NEXT: [[M:%.*]] = shl i8 [[X:%.*]], 6
|
||||||
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -65
|
; CHECK-NEXT: [[R:%.*]] = or i8 [[M]], -65
|
||||||
; CHECK-NEXT: ret i8 [[R]]
|
; CHECK-NEXT: ret i8 [[R]]
|
||||||
;
|
;
|
||||||
%m = mul i8 %x, 192 ; 0b1100_0000
|
%m = mul i8 %x, 192 ; 0b1100_0000
|
||||||
|
@ -91,8 +91,8 @@ define i8 @one_demanded_bit(i8 %x) {
|
||||||
|
|
||||||
define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) {
|
define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) {
|
||||||
; CHECK-LABEL: @one_demanded_bit_splat(
|
; CHECK-LABEL: @one_demanded_bit_splat(
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
|
; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
|
||||||
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 32, i8 32>
|
; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], <i8 32, i8 32>
|
||||||
; CHECK-NEXT: ret <2 x i8> [[R]]
|
; CHECK-NEXT: ret <2 x i8> [[R]]
|
||||||
;
|
;
|
||||||
%m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000
|
%m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000
|
||||||
|
@ -201,9 +201,10 @@ define i64 @scalar_mul_bit_x0_y0_uses(i64 %x, i64 %y) {
|
||||||
; Negative test
|
; Negative test
|
||||||
define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
|
define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
|
||||||
; CHECK-LABEL: @scalar_mul_bit_x0_y1(
|
; CHECK-LABEL: @scalar_mul_bit_x0_y1(
|
||||||
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
|
|
||||||
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2
|
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], 2
|
||||||
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[AND1]], [[AND2]]
|
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
|
||||||
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
|
||||||
|
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
|
||||||
; CHECK-NEXT: ret i64 [[MUL]]
|
; CHECK-NEXT: ret i64 [[MUL]]
|
||||||
;
|
;
|
||||||
%and1 = and i64 %x, 1
|
%and1 = and i64 %x, 1
|
||||||
|
@ -214,9 +215,10 @@ define i64 @scalar_mul_bit_x0_y1(i64 %x, i64 %y) {
|
||||||
|
|
||||||
define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) {
|
define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) {
|
||||||
; CHECK-LABEL: @scalar_mul_bit_x0_yC(
|
; CHECK-LABEL: @scalar_mul_bit_x0_yC(
|
||||||
; CHECK-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 1
|
|
||||||
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]]
|
; CHECK-NEXT: [[AND2:%.*]] = and i64 [[Y:%.*]], [[C:%.*]]
|
||||||
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[AND1]], [[AND2]]
|
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 1
|
||||||
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
|
||||||
|
; CHECK-NEXT: [[MUL:%.*]] = select i1 [[DOTNOT]], i64 0, i64 [[AND2]]
|
||||||
; CHECK-NEXT: ret i64 [[MUL]]
|
; CHECK-NEXT: ret i64 [[MUL]]
|
||||||
;
|
;
|
||||||
%and1 = and i64 %x, 1
|
%and1 = and i64 %x, 1
|
||||||
|
|
|
@ -466,8 +466,9 @@ define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
|
||||||
|
|
||||||
define i32 @lowbit_mul(i32 %a, i32 %b) {
|
define i32 @lowbit_mul(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: @lowbit_mul(
|
; CHECK-LABEL: @lowbit_mul(
|
||||||
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
|
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 1
|
||||||
; CHECK-NEXT: [[E:%.*]] = mul nuw i32 [[D]], [[B:%.*]]
|
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||||
|
; CHECK-NEXT: [[E:%.*]] = select i1 [[DOTNOT]], i32 0, i32 [[B:%.*]]
|
||||||
; CHECK-NEXT: ret i32 [[E]]
|
; CHECK-NEXT: ret i32 [[E]]
|
||||||
;
|
;
|
||||||
%d = and i32 %a, 1
|
%d = and i32 %a, 1
|
||||||
|
@ -480,8 +481,8 @@ define i32 @lowbit_mul(i32 %a, i32 %b) {
|
||||||
define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
|
define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
|
||||||
; CHECK-LABEL: @lowbit_mul_commute(
|
; CHECK-LABEL: @lowbit_mul_commute(
|
||||||
; CHECK-NEXT: [[B:%.*]] = xor <2 x i17> [[P:%.*]], <i17 42, i17 43>
|
; CHECK-NEXT: [[B:%.*]] = xor <2 x i17> [[P:%.*]], <i17 42, i17 43>
|
||||||
; CHECK-NEXT: [[D:%.*]] = and <2 x i17> [[A:%.*]], <i17 1, i17 1>
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i17> [[A:%.*]] to <2 x i1>
|
||||||
; CHECK-NEXT: [[E:%.*]] = mul nuw <2 x i17> [[B]], [[D]]
|
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP1]], <2 x i17> [[B]], <2 x i17> zeroinitializer
|
||||||
; CHECK-NEXT: ret <2 x i17> [[E]]
|
; CHECK-NEXT: ret <2 x i17> [[E]]
|
||||||
;
|
;
|
||||||
%b = xor <2 x i17> %p, <i17 42, i17 43> ; thwart complexity-based canonicalization
|
%b = xor <2 x i17> %p, <i17 42, i17 43> ; thwart complexity-based canonicalization
|
||||||
|
@ -490,6 +491,8 @@ define <2 x i17> @lowbit_mul_commute(<2 x i17> %a, <2 x i17> %p) {
|
||||||
ret <2 x i17> %e
|
ret <2 x i17> %e
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; negative test - extra use
|
||||||
|
|
||||||
define i32 @lowbit_mul_use(i32 %a, i32 %b) {
|
define i32 @lowbit_mul_use(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: @lowbit_mul_use(
|
; CHECK-LABEL: @lowbit_mul_use(
|
||||||
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
|
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 1
|
||||||
|
@ -503,6 +506,8 @@ define i32 @lowbit_mul_use(i32 %a, i32 %b) {
|
||||||
ret i32 %e
|
ret i32 %e
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; negative test - wrong mask
|
||||||
|
|
||||||
define i32 @not_lowbit_mul(i32 %a, i32 %b) {
|
define i32 @not_lowbit_mul(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: @not_lowbit_mul(
|
; CHECK-LABEL: @not_lowbit_mul(
|
||||||
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 2
|
; CHECK-NEXT: [[D:%.*]] = and i32 [[A:%.*]], 2
|
||||||
|
|
|
@ -1499,8 +1499,8 @@ define i32 @mul_no_common_bits_const_op(i32 %p) {
|
||||||
|
|
||||||
define <2 x i12> @mul_no_common_bits_commute(<2 x i12> %p) {
|
define <2 x i12> @mul_no_common_bits_commute(<2 x i12> %p) {
|
||||||
; CHECK-LABEL: @mul_no_common_bits_commute(
|
; CHECK-LABEL: @mul_no_common_bits_commute(
|
||||||
; CHECK-NEXT: [[X:%.*]] = and <2 x i12> [[P:%.*]], <i12 1, i12 1>
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i12> [[P:%.*]] to <2 x i1>
|
||||||
; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <2 x i12> [[X]], <i12 15, i12 17>
|
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i12> <i12 15, i12 17>, <2 x i12> zeroinitializer
|
||||||
; CHECK-NEXT: ret <2 x i12> [[R]]
|
; CHECK-NEXT: ret <2 x i12> [[R]]
|
||||||
;
|
;
|
||||||
%x = and <2 x i12> %p, <i12 1, i12 1>
|
%x = and <2 x i12> %p, <i12 1, i12 1>
|
||||||
|
|
Loading…
Reference in New Issue