[GlobalISel] Add computeNumSignBits() support for compares.

Doing so allows G_SEXT_INREG to be combined away for many vector cases.

Differential Revision: https://reviews.llvm.org/D135168
This commit is contained in:
Amara Emerson 2022-10-04 16:50:57 +01:00
parent 8055aa8e8a
commit c5cebf78bd
6 changed files with 109 additions and 773 deletions

View File

@ -711,6 +711,18 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
break;
}
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
if (TyBits == 1)
break;
auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
return TyBits; // All bits are sign bits.
if (BC == TargetLowering::ZeroOrOneBooleanContent)
return TyBits - 1; // Every always-zero bit is a sign bit.
break;
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {

View File

@ -57,10 +57,9 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
; GISEL-LABEL: combine_vec_udiv_nonuniform:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI1_4
; GISEL-NEXT: adrp x9, .LCPI1_0
; GISEL-NEXT: adrp x9, .LCPI1_5
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
; GISEL-NEXT: adrp x8, .LCPI1_3
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
; GISEL-NEXT: neg v1.8h, v1.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
; GISEL-NEXT: adrp x8, .LCPI1_2
@ -68,21 +67,20 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
; GISEL-NEXT: adrp x8, .LCPI1_5
; GISEL-NEXT: adrp x8, .LCPI1_1
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5]
; GISEL-NEXT: adrp x8, .LCPI1_1
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
; GISEL-NEXT: adrp x8, .LCPI1_0
; GISEL-NEXT: neg v3.8h, v3.8h
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
; GISEL-NEXT: shl v3.8h, v3.8h, #15
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5]
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
; GISEL-NEXT: neg v2.8h, v4.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
@ -108,25 +106,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI2_3
; GISEL-NEXT: adrp x9, .LCPI2_4
; GISEL-NEXT: adrp x10, .LCPI2_0
; GISEL-NEXT: adrp x9, .LCPI2_1
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
; GISEL-NEXT: adrp x8, .LCPI2_2
; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0]
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
; GISEL-NEXT: neg v1.8h, v1.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
; GISEL-NEXT: adrp x8, .LCPI2_1
; GISEL-NEXT: adrp x8, .LCPI2_4
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
; GISEL-NEXT: neg v4.8h, v4.8h
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4]
; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4]
; GISEL-NEXT: adrp x8, .LCPI2_0
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
; GISEL-NEXT: neg v3.8h, v5.8h
; GISEL-NEXT: shl v2.8h, v2.8h, #15
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
@ -151,23 +147,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI3_2
; GISEL-NEXT: adrp x9, .LCPI3_0
; GISEL-NEXT: adrp x9, .LCPI3_3
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
; GISEL-NEXT: adrp x8, .LCPI3_3
; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0]
; GISEL-NEXT: adrp x8, .LCPI3_1
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3]
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
; GISEL-NEXT: adrp x8, .LCPI3_1
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
; GISEL-NEXT: shl v2.8h, v2.8h, #15
; GISEL-NEXT: usra v1.8h, v4.8h, #1
; GISEL-NEXT: neg v3.8h, v3.8h
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; GISEL-NEXT: adrp x8, .LCPI3_0
; GISEL-NEXT: neg v2.8h, v2.8h
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
; GISEL-NEXT: usra v1.8h, v3.8h, #1
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@ -197,21 +191,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI4_2
; GISEL-NEXT: adrp x9, .LCPI4_0
; GISEL-NEXT: adrp x9, .LCPI4_1
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
; GISEL-NEXT: adrp x8, .LCPI4_3
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1]
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
; GISEL-NEXT: adrp x8, .LCPI4_1
; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b
; GISEL-NEXT: adrp x8, .LCPI4_0
; GISEL-NEXT: neg v4.16b, v4.16b
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
; GISEL-NEXT: shl v3.16b, v3.16b, #7
; GISEL-NEXT: neg v2.16b, v2.16b
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@ -248,28 +240,26 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; GISEL-LABEL: pr38477:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI5_3
; GISEL-NEXT: adrp x9, .LCPI5_0
; GISEL-NEXT: adrp x9, .LCPI5_4
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
; GISEL-NEXT: adrp x8, .LCPI5_2
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
; GISEL-NEXT: adrp x8, .LCPI5_4
; GISEL-NEXT: adrp x8, .LCPI5_1
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4]
; GISEL-NEXT: adrp x8, .LCPI5_1
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
; GISEL-NEXT: adrp x8, .LCPI5_0
; GISEL-NEXT: neg v3.8h, v3.8h
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
; GISEL-NEXT: shl v3.8h, v3.8h, #15
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4]
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
; GISEL-NEXT: neg v2.8h, v4.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>

View File

@ -305,8 +305,6 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
; GISEL-NEXT: usubl.4s v0, v0, v1
; GISEL-NEXT: cmgt.4s v1, v2, v0
; GISEL-NEXT: neg.4s v2, v0
; GISEL-NEXT: shl.4s v1, v1, #31
; GISEL-NEXT: sshr.4s v1, v1, #31
; GISEL-NEXT: bit.16b v0, v2, v1
; GISEL-NEXT: addv.4s s0, v0
; GISEL-NEXT: fmov w0, s0
@ -378,8 +376,6 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
; GISEL-NEXT: usubl.2d v0, v0, v1
; GISEL-NEXT: cmgt.2d v1, v2, v0
; GISEL-NEXT: neg.2d v2, v0
; GISEL-NEXT: shl.2d v1, v1, #63
; GISEL-NEXT: sshr.2d v1, v1, #63
; GISEL-NEXT: bit.16b v0, v2, v1
; GISEL-NEXT: addp.2d d0, v0
; GISEL-NEXT: fmov x0, d0
@ -1575,8 +1571,6 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.2s v2, v0
; GISEL-NEXT: cmge.2s v1, v0, v1
; GISEL-NEXT: shl.2s v1, v1, #31
; GISEL-NEXT: sshr.2s v1, v1, #31
; GISEL-NEXT: bif.8b v0, v2, v1
; GISEL-NEXT: ret
@ -1597,8 +1591,6 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.4h v2, v0
; GISEL-NEXT: cmgt.4h v1, v0, v1
; GISEL-NEXT: shl.4h v1, v1, #15
; GISEL-NEXT: sshr.4h v1, v1, #15
; GISEL-NEXT: bif.8b v0, v2, v1
; GISEL-NEXT: ret
; For GlobalISel, this generates terrible code until we can pattern match this to abs.
@ -1620,8 +1612,6 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.8b v2, v0
; GISEL-NEXT: cmgt.8b v1, v1, v0
; GISEL-NEXT: shl.8b v1, v1, #7
; GISEL-NEXT: sshr.8b v1, v1, #7
; GISEL-NEXT: bit.8b v0, v2, v1
; GISEL-NEXT: ret
@ -1642,8 +1632,6 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.4s v2, v0
; GISEL-NEXT: cmge.4s v1, v0, v1
; GISEL-NEXT: shl.4s v1, v1, #31
; GISEL-NEXT: sshr.4s v1, v1, #31
; GISEL-NEXT: bif.16b v0, v2, v1
; GISEL-NEXT: ret
@ -1664,8 +1652,6 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.8h v2, v0
; GISEL-NEXT: cmgt.8h v1, v0, v1
; GISEL-NEXT: shl.8h v1, v1, #15
; GISEL-NEXT: sshr.8h v1, v1, #15
; GISEL-NEXT: bif.16b v0, v2, v1
; GISEL-NEXT: ret
@ -1686,8 +1672,6 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.16b v2, v0
; GISEL-NEXT: cmgt.16b v1, v1, v0
; GISEL-NEXT: shl.16b v1, v1, #7
; GISEL-NEXT: sshr.16b v1, v1, #7
; GISEL-NEXT: bit.16b v0, v2, v1
; GISEL-NEXT: ret
@ -1708,8 +1692,6 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
; GISEL-NEXT: movi.2d v1, #0000000000000000
; GISEL-NEXT: neg.2d v2, v0
; GISEL-NEXT: cmge.2d v1, v1, v0
; GISEL-NEXT: shl.2d v1, v1, #63
; GISEL-NEXT: sshr.2d v1, v1, #63
; GISEL-NEXT: bit.16b v0, v2, v1
; GISEL-NEXT: ret
@ -1731,8 +1713,6 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
; GISEL-NEXT: ssubl.2d v0, v0, v1
; GISEL-NEXT: cmgt.2d v1, v2, v0
; GISEL-NEXT: neg.2d v2, v0
; GISEL-NEXT: shl.2d v1, v1, #63
; GISEL-NEXT: sshr.2d v1, v1, #63
; GISEL-NEXT: bit.16b v0, v2, v1
; GISEL-NEXT: ret
%aext = sext <2 x i32> %a to <2 x i64>
@ -1782,3 +1762,5 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
%absel = select <2 x i1> %abcmp, <2 x i128> %ababs, <2 x i128> %abdiff
ret <2 x i128> %absel
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; FALLBACK: {{.*}}

View File

@ -224,19 +224,11 @@ define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smax2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smax2i64:
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret
; CHECK-LABEL: smax2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
}
@ -257,10 +249,6 @@ define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v0.2d, v2.2d
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
@ -488,19 +476,11 @@ define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umax2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umax2i64:
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret
; CHECK-LABEL: umax2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
}
@ -521,10 +501,6 @@ define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v0.2d, v2.2d
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
@ -752,19 +728,11 @@ define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smin2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smin2i64:
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret
; CHECK-LABEL: smin2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
}
@ -785,10 +753,6 @@ define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v2.2d, v0.2d
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v3.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
@ -1016,19 +980,11 @@ define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umin2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umin2i64:
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret
; CHECK-LABEL: umin2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
}
@ -1049,10 +1005,6 @@ define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]

File diff suppressed because it is too large Load Diff

View File

@ -924,6 +924,36 @@ TEST_F(AArch64GISelMITest, TestNumSignBitsTrunc) {
EXPECT_EQ(5u, Info.computeNumSignBits(CopyTrunc7));
}
TEST_F(AArch64GISelMITest, TestNumSignBitsCmp) {
StringRef MIRString =
" %v1:_(<4 x s32>) = G_IMPLICIT_DEF\n"
" %v2:_(<4 x s32>) = G_IMPLICIT_DEF\n"
" %s1:_(s64) = G_IMPLICIT_DEF\n"
" %s2:_(s64) = G_IMPLICIT_DEF\n"
" %cmp:_(<4 x s32>) = G_FCMP floatpred(ogt), %v1, %v2\n"
" %cpy1:_(<4 x s32>) = COPY %cmp\n"
" %cmp2:_(<4 x s32>) = G_ICMP intpred(eq), %v1, %v2\n"
" %cpy2:_(<4 x s32>) = COPY %cmp2\n"
" %cmp3:_(s32) = G_FCMP floatpred(ogt), %s1, %s2\n"
" %cpy3:_(s32) = COPY %cmp3\n"
" %cmp4:_(s32) = G_ICMP intpred(eq), %s1, %s2\n"
" %cpy4:_(s32) = COPY %cmp4\n";
setUp(MIRString);
if (!TM)
return;
Register CopyVecFCMP = Copies[Copies.size() - 4];
Register CopyVecICMP = Copies[Copies.size() - 3];
Register CopyScalarFCMP = Copies[Copies.size() - 2];
Register CopyScalarICMP = Copies[Copies.size() - 1];
GISelKnownBits Info(*MF);
EXPECT_EQ(32u, Info.computeNumSignBits(CopyVecFCMP));
EXPECT_EQ(32u, Info.computeNumSignBits(CopyVecICMP));
EXPECT_EQ(31u, Info.computeNumSignBits(CopyScalarFCMP));
EXPECT_EQ(31u, Info.computeNumSignBits(CopyScalarICMP));
}
TEST_F(AMDGPUGISelMITest, TestNumSignBitsTrunc) {
StringRef MIRString =
" %3:_(<4 x s32>) = G_IMPLICIT_DEF\n"