[AArch64][GlobalISel] When lowering signext i1 parameters, don't zero-extend to s8 first.

Fixes https://github.com/llvm/llvm-project/issues/57181
This commit is contained in:
Amara Emerson 2022-09-05 21:53:41 +01:00
parent c2e7c9cb33
commit 13792ba417
4 changed files with 164 additions and 8 deletions

View File

@ -383,7 +383,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
if (MRI.getType(CurVReg).getSizeInBits() == 1) {
auto &Flags = CurArgInfo.Flags[0];
if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
!Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
@ -569,7 +571,8 @@ bool AArch64CallLowering::lowerFormalArguments(
MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
"Unexpected registers used for i1 arg");
if (!OrigArg.Flags[0].isZExt()) {
auto &Flags = OrigArg.Flags[0];
if (!Flags.isZExt() && !Flags.isSExt()) {
// Lower i1 argument as i8, and insert AssertZExt + Trunc later.
Register OrigReg = OrigArg.Regs[0];
Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
@ -1110,7 +1113,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
for (auto &OrigArg : Info.OrigArgs) {
splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
if (OrigArg.Ty->isIntegerTy(1)) {
auto &Flags = OrigArg.Flags[0];
if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) {
ArgInfo &OutArg = OutArgs.back();
assert(OutArg.Regs.size() == 1 &&
MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&

View File

@ -42,7 +42,7 @@ define i32 @signext_param_i32(i32 signext %x) {
ret i32 %x
}
; Zeroext param is passed on the stack. We should still get a G_ASSERT_SEXT.
; signext param is passed on the stack. We should still get a G_ASSERT_SEXT.
define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
; CHECK-LABEL: name: signext_param_stack
; CHECK: bb.1 (%ir-block.0):
@ -61,10 +61,8 @@ define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[SEXTLOAD]], 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s1)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
i64 %g, i64 %h, i64 %i, i1 signext %j) {
@ -124,3 +122,45 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
i8 signext %j) {
ret i8 %j
}
define i32 @callee_signext_i1(i1 signext %0) {
; CHECK-LABEL: name: callee_signext_i1
; CHECK: bb.1 (%ir-block.1):
; CHECK-NEXT: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = sext i1 %0 to i32
ret i32 %r
}
define i32 @caller_signext_i1() {
; CHECK-LABEL: name: caller_signext_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT [[C]](s1)
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s8)
; CHECK-NEXT: $w0 = COPY [[SEXT1]](s32)
; CHECK-NEXT: BL @callee_signext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = call i32 @callee_signext_i1(i1 signext true)
ret i32 %r
}
define signext i1 @ret_signext_i1() {
; CHECK-LABEL: name: ret_signext_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
ret i1 true
}

View File

@ -116,3 +116,45 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
i8 zeroext %j) {
ret i8 %j
}
define i32 @callee_zeroext_i1(i1 zeroext %0) {
; CHECK-LABEL: name: callee_zeroext_i1
; CHECK: bb.1 (%ir-block.1):
; CHECK-NEXT: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = zext i1 %0 to i32
ret i32 %r
}
define i32 @caller_zeroext_i1() {
; CHECK-LABEL: name: caller_zeroext_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s8)
; CHECK-NEXT: $w0 = COPY [[ZEXT1]](s32)
; CHECK-NEXT: BL @callee_zeroext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = call i32 @callee_zeroext_i1(i1 zeroext true)
ret i32 %r
}
define zeroext i1 @ret_zeroext_i1() {
; CHECK-LABEL: name: ret_zeroext_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s1)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
ret i1 true
}

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL
define <4 x i32> @sextbool_add_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x) {
; CHECK-LABEL: sextbool_add_vector:
@ -7,6 +8,12 @@ define <4 x i32> @sextbool_add_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: sextbool_add_vector:
; GISEL: // %bb.0:
; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s
; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
; GISEL-NEXT: ret
%c = icmp eq <4 x i32> %c1, %c2
%b = sext <4 x i1> %c to <4 x i32>
%s = add <4 x i32> %x, %b
@ -19,6 +26,15 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: zextbool_sub_vector:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI1_0
; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_0]
; GISEL-NEXT: and v0.16b, v0.16b, v3.16b
; GISEL-NEXT: sub v0.4s, v2.4s, v0.4s
; GISEL-NEXT: ret
%c = icmp eq <4 x i32> %c1, %c2
%b = zext <4 x i1> %c to <4 x i32>
%s = sub <4 x i32> %x, %b
@ -30,6 +46,12 @@ define i32 @assertsext_sub_1(i1 signext %cond, i32 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: add w0, w1, w0
; CHECK-NEXT: ret
;
; GISEL-LABEL: assertsext_sub_1:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0x1
; GISEL-NEXT: sub w0, w1, w8
; GISEL-NEXT: ret
%e = zext i1 %cond to i32
%r = sub i32 %y, %e
ret i32 %r
@ -40,6 +62,12 @@ define i32 @assertsext_add_1(i1 signext %cond, i32 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub w0, w1, w0
; CHECK-NEXT: ret
;
; GISEL-LABEL: assertsext_add_1:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0x1
; GISEL-NEXT: add w0, w8, w1
; GISEL-NEXT: ret
%e = zext i1 %cond to i32
%r = add i32 %e, %y
ret i32 %r
@ -50,8 +78,50 @@ define i32 @assertsext_add_1_commute(i1 signext %cond, i32 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub w0, w1, w0
; CHECK-NEXT: ret
;
; GISEL-LABEL: assertsext_add_1_commute:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0x1
; GISEL-NEXT: add w0, w1, w8
; GISEL-NEXT: ret
%e = zext i1 %cond to i32
%r = add i32 %y, %e
ret i32 %r
}
define i32 @callee_signext_i1(i1 signext %0) {
; CHECK-LABEL: callee_signext_i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
;
; GISEL-LABEL: callee_signext_i1:
; GISEL: // %bb.0:
; GISEL-NEXT: ret
%r = sext i1 %0 to i32
ret i32 %r
}
define i32 @caller_signext_i1() {
; CHECK-LABEL: caller_signext_i1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w0, #-1
; CHECK-NEXT: bl callee_signext_i1
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
; GISEL-LABEL: caller_signext_i1:
; GISEL: // %bb.0:
; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; GISEL-NEXT: .cfi_def_cfa_offset 16
; GISEL-NEXT: .cfi_offset w30, -16
; GISEL-NEXT: mov w8, #1
; GISEL-NEXT: sbfx w0, w8, #0, #1
; GISEL-NEXT: bl callee_signext_i1
; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; GISEL-NEXT: ret
%r = call i32 @callee_signext_i1(i1 signext true)
ret i32 %r
}