[SVE] Fix incorrect predicate for fixed length int/fp conversion.

When performing shrinking int/fp conversions the predicate should
be created to match the original fixed length vector type so the
unused lanes don't trigger side effects.

This patch also includes related refactoring to better detect such
issues and streamline the code a little.

Differential Revision: https://reviews.llvm.org/D138351
This commit is contained in:
Paul Walker 2022-11-11 21:01:59 +00:00
parent c08d3b08f6
commit 44a18a8fce
3 changed files with 26 additions and 88 deletions

View File

@ -22981,9 +22981,8 @@ AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
SDValue Pg = getPredicateForVector(DAG, DL, VT);
if (VT.bitsGE(SrcVT)) {
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
VT.changeTypeToInteger(), Val);
@ -22998,7 +22997,7 @@ AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
} else {
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
ContainerDstVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
@ -23026,11 +23025,10 @@ AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
if (VT.bitsGT(SrcVT)) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, VT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
@ -23042,7 +23040,7 @@ AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
// Safe to use a larger than specified result since an fp_to_int where the
// result doesn't fit into the destination is undefined.

View File

@ -343,7 +343,6 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -360,7 +359,6 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
@ -374,9 +372,8 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: fcvtzu z0.s, p1/m, z0.s
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x float>, ptr %a
@ -389,9 +386,8 @@ define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v32f32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.s, p1/m, z0.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x float>, ptr %a
@ -404,9 +400,8 @@ define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v64f32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.s, p1/m, z0.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <64 x float>, ptr %a
@ -616,7 +611,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -642,7 +637,6 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -660,7 +654,6 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -675,7 +668,6 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -690,9 +682,8 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x double>, ptr %a
@ -705,9 +696,8 @@ define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x double>, ptr %a
@ -748,7 +738,6 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -765,7 +754,6 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -779,9 +767,8 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: fcvtzu z0.d, p1/m, z0.d
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x double>, ptr %a
@ -794,9 +781,8 @@ define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x double>, ptr %a
@ -809,9 +795,8 @@ define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x double>, ptr %a
@ -1251,7 +1236,6 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -1268,7 +1252,6 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1282,9 +1265,8 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: fcvtzs z0.s, p1/m, z0.s
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x float>, ptr %a
@ -1297,9 +1279,8 @@ define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v32f32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.s, p1/m, z0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x float>, ptr %a
@ -1312,9 +1293,8 @@ define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v64f32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.s, p1/m, z0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <64 x float>, ptr %a
@ -1524,7 +1504,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1550,7 +1530,6 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1568,7 +1547,6 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1583,7 +1561,6 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1598,9 +1575,8 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x double>, ptr %a
@ -1613,9 +1589,8 @@ define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x double>, ptr %a
@ -1656,7 +1631,6 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -1673,7 +1647,6 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1687,9 +1660,8 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: fcvtzs z0.d, p1/m, z0.d
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x double>, ptr %a
@ -1702,9 +1674,8 @@ define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x double>, ptr %a
@ -1717,9 +1688,8 @@ define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x double>, ptr %a

View File

@ -340,7 +340,6 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -357,7 +356,6 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.s
; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
@ -372,7 +370,6 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.s
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -389,7 +386,6 @@ define void @ucvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -406,7 +402,6 @@ define void @ucvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -630,7 +625,7 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v2i64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -645,7 +640,6 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -663,7 +657,6 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.d
; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -678,7 +671,6 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.d
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -694,7 +686,6 @@ define void @ucvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -711,7 +702,6 @@ define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -755,7 +745,6 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -772,7 +761,6 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.d
; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -787,7 +775,6 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.d
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
@ -804,7 +791,6 @@ define void @ucvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -821,7 +807,6 @@ define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1274,7 +1259,6 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -1291,7 +1275,6 @@ define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.s
; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1306,7 +1289,6 @@ define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.s
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1323,7 +1305,6 @@ define void @scvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1340,7 +1321,6 @@ define void @scvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1570,7 +1550,7 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v2i64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1585,7 +1565,6 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1603,7 +1582,6 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.d
; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1618,7 +1596,6 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.d
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
@ -1634,7 +1611,6 @@ define void @scvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1651,7 +1627,6 @@ define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1695,7 +1670,6 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@ -1712,7 +1686,6 @@ define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.d
; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1727,7 +1700,6 @@ define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.d
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1744,7 +1716,6 @@ define void @scvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
@ -1761,7 +1732,6 @@ define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s