[GlobalISel] Add computeNumSignBits() support for compares.
Doing so allows G_SEXT_INREG to be combined away for many vector cases. Differential Revision: https://reviews.llvm.org/D135168
This commit is contained in:
parent
8055aa8e8a
commit
c5cebf78bd
|
@ -711,6 +711,18 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
|
|||
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_FCMP:
|
||||
case TargetOpcode::G_ICMP: {
|
||||
bool IsFP = Opcode == TargetOpcode::G_FCMP;
|
||||
if (TyBits == 1)
|
||||
break;
|
||||
auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
|
||||
if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
|
||||
return TyBits; // All bits are sign bits.
|
||||
if (BC == TargetLowering::ZeroOrOneBooleanContent)
|
||||
return TyBits - 1; // Every always-zero bit is a sign bit.
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
default: {
|
||||
|
|
|
@ -57,10 +57,9 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
|
|||
; GISEL-LABEL: combine_vec_udiv_nonuniform:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_4
|
||||
; GISEL-NEXT: adrp x9, .LCPI1_0
|
||||
; GISEL-NEXT: adrp x9, .LCPI1_5
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_3
|
||||
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
|
||||
; GISEL-NEXT: neg v1.8h, v1.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_2
|
||||
|
@ -68,21 +67,20 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
|
|||
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
|
||||
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_5
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_1
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
|
||||
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_1
|
||||
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_0
|
||||
; GISEL-NEXT: neg v3.8h, v3.8h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
|
||||
; GISEL-NEXT: shl v3.8h, v3.8h, #15
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5]
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
|
||||
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
|
||||
; GISEL-NEXT: neg v2.8h, v4.8h
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
|
||||
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
|
||||
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
|
||||
|
@ -108,25 +106,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
|
|||
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_3
|
||||
; GISEL-NEXT: adrp x9, .LCPI2_4
|
||||
; GISEL-NEXT: adrp x10, .LCPI2_0
|
||||
; GISEL-NEXT: adrp x9, .LCPI2_1
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_2
|
||||
; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0]
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
|
||||
; GISEL-NEXT: neg v1.8h, v1.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_1
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_4
|
||||
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: neg v4.8h, v4.8h
|
||||
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
|
||||
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
|
||||
; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4]
|
||||
; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_0
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: neg v3.8h, v5.8h
|
||||
; GISEL-NEXT: shl v2.8h, v2.8h, #15
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
|
||||
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
|
||||
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
|
||||
|
@ -151,23 +147,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
|
|||
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_2
|
||||
; GISEL-NEXT: adrp x9, .LCPI3_0
|
||||
; GISEL-NEXT: adrp x9, .LCPI3_3
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_3
|
||||
; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_1
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3]
|
||||
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_1
|
||||
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
|
||||
; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
|
||||
; GISEL-NEXT: shl v2.8h, v2.8h, #15
|
||||
; GISEL-NEXT: usra v1.8h, v4.8h, #1
|
||||
; GISEL-NEXT: neg v3.8h, v3.8h
|
||||
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_0
|
||||
; GISEL-NEXT: neg v2.8h, v2.8h
|
||||
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: usra v1.8h, v3.8h, #1
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
|
||||
; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
|
||||
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
|
||||
|
@ -197,21 +191,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
|||
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_2
|
||||
; GISEL-NEXT: adrp x9, .LCPI4_0
|
||||
; GISEL-NEXT: adrp x9, .LCPI4_1
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_3
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1]
|
||||
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
|
||||
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_1
|
||||
; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_0
|
||||
; GISEL-NEXT: neg v4.16b, v4.16b
|
||||
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
|
||||
; GISEL-NEXT: shl v3.16b, v3.16b, #7
|
||||
; GISEL-NEXT: neg v2.16b, v2.16b
|
||||
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
|
||||
; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
|
||||
; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
|
||||
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ret
|
||||
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
|
@ -248,28 +240,26 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
|
|||
; GISEL-LABEL: pr38477:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_3
|
||||
; GISEL-NEXT: adrp x9, .LCPI5_0
|
||||
; GISEL-NEXT: adrp x9, .LCPI5_4
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_2
|
||||
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
|
||||
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_4
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_1
|
||||
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
|
||||
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_1
|
||||
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_0
|
||||
; GISEL-NEXT: neg v3.8h, v3.8h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
|
||||
; GISEL-NEXT: shl v3.8h, v3.8h, #15
|
||||
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4]
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
|
||||
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
|
||||
; GISEL-NEXT: neg v2.8h, v4.8h
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
|
||||
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
|
||||
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
|
||||
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
|
||||
|
|
|
@ -305,8 +305,6 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
|
|||
; GISEL-NEXT: usubl.4s v0, v0, v1
|
||||
; GISEL-NEXT: cmgt.4s v1, v2, v0
|
||||
; GISEL-NEXT: neg.4s v2, v0
|
||||
; GISEL-NEXT: shl.4s v1, v1, #31
|
||||
; GISEL-NEXT: sshr.4s v1, v1, #31
|
||||
; GISEL-NEXT: bit.16b v0, v2, v1
|
||||
; GISEL-NEXT: addv.4s s0, v0
|
||||
; GISEL-NEXT: fmov w0, s0
|
||||
|
@ -378,8 +376,6 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
|
|||
; GISEL-NEXT: usubl.2d v0, v0, v1
|
||||
; GISEL-NEXT: cmgt.2d v1, v2, v0
|
||||
; GISEL-NEXT: neg.2d v2, v0
|
||||
; GISEL-NEXT: shl.2d v1, v1, #63
|
||||
; GISEL-NEXT: sshr.2d v1, v1, #63
|
||||
; GISEL-NEXT: bit.16b v0, v2, v1
|
||||
; GISEL-NEXT: addp.2d d0, v0
|
||||
; GISEL-NEXT: fmov x0, d0
|
||||
|
@ -1575,8 +1571,6 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.2s v2, v0
|
||||
; GISEL-NEXT: cmge.2s v1, v0, v1
|
||||
; GISEL-NEXT: shl.2s v1, v1, #31
|
||||
; GISEL-NEXT: sshr.2s v1, v1, #31
|
||||
; GISEL-NEXT: bif.8b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1597,8 +1591,6 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.4h v2, v0
|
||||
; GISEL-NEXT: cmgt.4h v1, v0, v1
|
||||
; GISEL-NEXT: shl.4h v1, v1, #15
|
||||
; GISEL-NEXT: sshr.4h v1, v1, #15
|
||||
; GISEL-NEXT: bif.8b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
; For GlobalISel, this generates terrible code until we can pattern match this to abs.
|
||||
|
@ -1620,8 +1612,6 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.8b v2, v0
|
||||
; GISEL-NEXT: cmgt.8b v1, v1, v0
|
||||
; GISEL-NEXT: shl.8b v1, v1, #7
|
||||
; GISEL-NEXT: sshr.8b v1, v1, #7
|
||||
; GISEL-NEXT: bit.8b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1642,8 +1632,6 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.4s v2, v0
|
||||
; GISEL-NEXT: cmge.4s v1, v0, v1
|
||||
; GISEL-NEXT: shl.4s v1, v1, #31
|
||||
; GISEL-NEXT: sshr.4s v1, v1, #31
|
||||
; GISEL-NEXT: bif.16b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1664,8 +1652,6 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.8h v2, v0
|
||||
; GISEL-NEXT: cmgt.8h v1, v0, v1
|
||||
; GISEL-NEXT: shl.8h v1, v1, #15
|
||||
; GISEL-NEXT: sshr.8h v1, v1, #15
|
||||
; GISEL-NEXT: bif.16b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1686,8 +1672,6 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.16b v2, v0
|
||||
; GISEL-NEXT: cmgt.16b v1, v1, v0
|
||||
; GISEL-NEXT: shl.16b v1, v1, #7
|
||||
; GISEL-NEXT: sshr.16b v1, v1, #7
|
||||
; GISEL-NEXT: bit.16b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1708,8 +1692,6 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
|
|||
; GISEL-NEXT: movi.2d v1, #0000000000000000
|
||||
; GISEL-NEXT: neg.2d v2, v0
|
||||
; GISEL-NEXT: cmge.2d v1, v1, v0
|
||||
; GISEL-NEXT: shl.2d v1, v1, #63
|
||||
; GISEL-NEXT: sshr.2d v1, v1, #63
|
||||
; GISEL-NEXT: bit.16b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
|
||||
|
@ -1731,8 +1713,6 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
|
|||
; GISEL-NEXT: ssubl.2d v0, v0, v1
|
||||
; GISEL-NEXT: cmgt.2d v1, v2, v0
|
||||
; GISEL-NEXT: neg.2d v2, v0
|
||||
; GISEL-NEXT: shl.2d v1, v1, #63
|
||||
; GISEL-NEXT: sshr.2d v1, v1, #63
|
||||
; GISEL-NEXT: bit.16b v0, v2, v1
|
||||
; GISEL-NEXT: ret
|
||||
%aext = sext <2 x i32> %a to <2 x i64>
|
||||
|
@ -1782,3 +1762,5 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
|
|||
%absel = select <2 x i1> %abcmp, <2 x i128> %ababs, <2 x i128> %abdiff
|
||||
ret <2 x i128> %absel
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; FALLBACK: {{.*}}
|
||||
|
|
|
@ -224,19 +224,11 @@ define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
|
|||
declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
||||
|
||||
define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smax2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smax2i64:
|
||||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v0.2d, v1.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-GLOBAL-NEXT: ret
|
||||
; CHECK-LABEL: smax2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
@ -257,10 +249,6 @@ define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
|||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v0.2d, v2.2d
|
||||
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v1.2d, v3.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
|
||||
|
@ -488,19 +476,11 @@ define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
|
|||
declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
||||
|
||||
define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umax2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umax2i64:
|
||||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v0.2d, v1.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-GLOBAL-NEXT: ret
|
||||
; CHECK-LABEL: umax2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
@ -521,10 +501,6 @@ define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
|||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v0.2d, v2.2d
|
||||
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v1.2d, v3.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
|
||||
|
@ -752,19 +728,11 @@ define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
|
|||
declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
||||
|
||||
define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smin2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smin2i64:
|
||||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v1.2d, v0.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-GLOBAL-NEXT: ret
|
||||
; CHECK-LABEL: smin2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
|
||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
@ -785,10 +753,6 @@ define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
|||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v2.2d, v0.2d
|
||||
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v3.2d, v1.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
|
||||
|
@ -1016,19 +980,11 @@ define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
|
|||
declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
||||
|
||||
define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umin2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umin2i64:
|
||||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v1.2d, v0.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-GLOBAL-NEXT: ret
|
||||
; CHECK-LABEL: umin2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d
|
||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
@ -1049,10 +1005,6 @@ define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
|||
; CHECK-GLOBAL: // %bb.0:
|
||||
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v2.2d, v0.2d
|
||||
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v3.2d, v1.2d
|
||||
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63
|
||||
; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-GLOBAL-NEXT: stp q0, q1, [x0]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -924,6 +924,36 @@ TEST_F(AArch64GISelMITest, TestNumSignBitsTrunc) {
|
|||
EXPECT_EQ(5u, Info.computeNumSignBits(CopyTrunc7));
|
||||
}
|
||||
|
||||
TEST_F(AArch64GISelMITest, TestNumSignBitsCmp) {
|
||||
StringRef MIRString =
|
||||
" %v1:_(<4 x s32>) = G_IMPLICIT_DEF\n"
|
||||
" %v2:_(<4 x s32>) = G_IMPLICIT_DEF\n"
|
||||
" %s1:_(s64) = G_IMPLICIT_DEF\n"
|
||||
" %s2:_(s64) = G_IMPLICIT_DEF\n"
|
||||
" %cmp:_(<4 x s32>) = G_FCMP floatpred(ogt), %v1, %v2\n"
|
||||
" %cpy1:_(<4 x s32>) = COPY %cmp\n"
|
||||
" %cmp2:_(<4 x s32>) = G_ICMP intpred(eq), %v1, %v2\n"
|
||||
" %cpy2:_(<4 x s32>) = COPY %cmp2\n"
|
||||
" %cmp3:_(s32) = G_FCMP floatpred(ogt), %s1, %s2\n"
|
||||
" %cpy3:_(s32) = COPY %cmp3\n"
|
||||
" %cmp4:_(s32) = G_ICMP intpred(eq), %s1, %s2\n"
|
||||
" %cpy4:_(s32) = COPY %cmp4\n";
|
||||
|
||||
setUp(MIRString);
|
||||
if (!TM)
|
||||
return;
|
||||
Register CopyVecFCMP = Copies[Copies.size() - 4];
|
||||
Register CopyVecICMP = Copies[Copies.size() - 3];
|
||||
Register CopyScalarFCMP = Copies[Copies.size() - 2];
|
||||
Register CopyScalarICMP = Copies[Copies.size() - 1];
|
||||
|
||||
GISelKnownBits Info(*MF);
|
||||
EXPECT_EQ(32u, Info.computeNumSignBits(CopyVecFCMP));
|
||||
EXPECT_EQ(32u, Info.computeNumSignBits(CopyVecICMP));
|
||||
EXPECT_EQ(31u, Info.computeNumSignBits(CopyScalarFCMP));
|
||||
EXPECT_EQ(31u, Info.computeNumSignBits(CopyScalarICMP));
|
||||
}
|
||||
|
||||
TEST_F(AMDGPUGISelMITest, TestNumSignBitsTrunc) {
|
||||
StringRef MIRString =
|
||||
" %3:_(<4 x s32>) = G_IMPLICIT_DEF\n"
|
||||
|
|
Loading…
Reference in New Issue