[ARM,MVE] Update MVE_VMLA_qr for architecture change.

In revision B.q and before of the Armv8-M architecture reference
manual, the vector/scalar forms of the `vmla` and `vmlas` instructions
came in signed and unsigned integer forms, such as `vmla.s8 q0,q1,r2`
or `vmlas.u32 q3,q4,r5`.

Revision B.r has changed this. There are no longer signed and unsigned
versions of these instructions, since they were functionally identical
anyway. Now there is just `vmla.i8` (or `i16` or `i32`, and similarly
for `vmlas`). Bit 28 of the instruction encoding, which was previously
0 for signed or 1 for unsigned, is now expected to be 0 always.

This change updates LLVM to the new version of the architecture. The
obsoleted encodings for unsigned integers are now decoding errors, and
only the still-valid encoding is ever emitted. This shouldn't break
any existing assembly code, because the old signed and unsigned
versions of the mnemonic are still accepted by the assembler (which is
standard practice anyway for all signedness-agnostic MVE integer
instructions).

Reviewed By: dmgreen, lenary

Differential Revision: https://reviews.llvm.org/D138827
This commit is contained in:
Simon Tatham 2022-11-29 08:46:49 +00:00
parent a580d2e430
commit e45cbf9923
15 changed files with 170 additions and 167 deletions

View File

@ -5625,7 +5625,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, 0b0, VTI.Size,
scalar_addend, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
@ -5635,10 +5635,7 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
defvar s = (i32 rGPR:$s);
defvar pred = (VTI.Pred VCCR:$pred);
// The signed and unsigned variants of this instruction have different
// encodings, but they're functionally identical. For the sake of
// determinism, we generate only the unsigned variant.
if VTI.Unsigned then let Predicates = [HasMVEInt] in {
let Predicates = [HasMVEInt] in {
if scalar_addend then {
def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
(VTI.Vec (Inst v1, v2, s))>;
@ -5652,19 +5649,13 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
}
}
defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
defm MVE_VMLA_qr_i8 : MVE_VMLA_qr_multi<"vmla", MVE_v16i8, 0b0>;
defm MVE_VMLA_qr_i16 : MVE_VMLA_qr_multi<"vmla", MVE_v8i16, 0b0>;
defm MVE_VMLA_qr_i32 : MVE_VMLA_qr_multi<"vmla", MVE_v4i32, 0b0>;
defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
defm MVE_VMLAS_qr_i8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16i8, 0b1>;
defm MVE_VMLAS_qr_i16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8i16, 0b1>;
defm MVE_VMLAS_qr_i32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4i32, 0b1>;
multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {

View File

@ -24,7 +24,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: vmla.i32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
@ -89,7 +89,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrht.s32 q2, [r1], #8
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: vmla.i32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
@ -154,7 +154,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: vmla.i32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
@ -219,7 +219,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrht.u32 q2, [r1], #8
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: vmla.i32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
@ -284,7 +284,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q2, [r1], #16
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: vmla.i32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
@ -361,7 +361,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vmlas.i32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB5_5
; CHECK-NEXT: b .LBB5_11
@ -559,7 +559,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.s32 q0, [r0], #8
; CHECK-NEXT: vldrh.s32 q1, [r1], #8
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vmlas.i32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB6_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -637,7 +637,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vmlas.i32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB7_5
; CHECK-NEXT: b .LBB7_11
@ -835,7 +835,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u32 q0, [r0], #8
; CHECK-NEXT: vldrh.u32 q1, [r1], #8
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vmlas.i32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB8_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -913,7 +913,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vmlas.i32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB9_5
; CHECK-NEXT: b .LBB9_11

View File

@ -63,7 +63,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: vshr.u16 q1, q0, #3
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vmov q2, q4
; CHECK-NEXT: vmla.u16 q2, q1, r2
; CHECK-NEXT: vmla.i16 q2, q1, r2
; CHECK-NEXT: vshr.u16 q1, q2, #5
; CHECK-NEXT: vshl.i16 q2, q0, #3
; CHECK-NEXT: vand q3, q1, q5
@ -74,7 +74,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: vmov q5, q4
; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vshr.u16 q0, q0, #9
; CHECK-NEXT: vmla.u16 q4, q2, r2
; CHECK-NEXT: vmla.i16 q4, q2, r2
; CHECK-NEXT: vshr.u16 q2, q4, #11
; CHECK-NEXT: vmov q4, q5
; CHECK-NEXT: vmov q5, q6
@ -83,7 +83,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
; CHECK-NEXT: vorr q1, q3, q2
; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vand q0, q0, q7
; CHECK-NEXT: vmla.u16 q2, q0, r2
; CHECK-NEXT: vmla.i16 q2, q0, r2
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload
@ -242,7 +242,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
; CHECK-NEXT: vshl.i16 q1, q0, #3
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vmov q2, q4
; CHECK-NEXT: vmla.u16 q2, q1, r3
; CHECK-NEXT: vmla.i16 q2, q1, r3
; CHECK-NEXT: vshr.u16 q1, q0, #3
; CHECK-NEXT: vand q1, q1, q5
; CHECK-NEXT: vmov.f64 d14, d10
@ -251,11 +251,11 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
; CHECK-NEXT: vmov.f64 d11, d9
; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vshr.u16 q0, q0, #9
; CHECK-NEXT: vmla.u16 q4, q1, r3
; CHECK-NEXT: vmla.i16 q4, q1, r3
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vmla.u16 q1, q0, r3
; CHECK-NEXT: vmla.i16 q1, q0, r3
; CHECK-NEXT: vshr.u16 q0, q2, #11
; CHECK-NEXT: vshr.u16 q2, q4, #5
; CHECK-NEXT: vand q2, q2, q6

View File

@ -162,7 +162,7 @@ body: |
; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg, $noreg :: (load (s128) from %ir.input_1_cast, align 4)
; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2
; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3
; CHECK: renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
; CHECK: renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
; CHECK: renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 0, $noreg, $noreg, undef renamable $q2
; CHECK: renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 0, $noreg, $noreg, undef renamable $q3
; CHECK: renamable $r12 = MVE_VMLADAVas32 killed renamable $r12, killed renamable $q3, killed renamable $q2, 0, killed $noreg, $noreg
@ -210,7 +210,7 @@ body: |
renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2
renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3
renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14 /* CC::al */, $noreg
renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
MVE_VPST 2, implicit $vpr
renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 1, renamable $vpr, $noreg, undef renamable $q2
renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 1, renamable $vpr, $noreg, undef renamable $q3

View File

@ -339,7 +339,7 @@ define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %d
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmov q3, q0
; CHECK-NEXT: vadd.i32 q2, q1, r4
; CHECK-NEXT: vmla.u32 q3, q1, lr
; CHECK-NEXT: vmla.i32 q3, q1, lr
; CHECK-NEXT: vmul.i32 q1, q1, r12
; CHECK-NEXT: vldrw.u32 q4, [q3, #24]
; CHECK-NEXT: subs r2, #4
@ -401,11 +401,11 @@ define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %d
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vadd.i32 q3, q2, r4
; CHECK-NEXT: vmla.u32 q4, q2, lr
; CHECK-NEXT: vmla.i32 q4, q2, lr
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldrw.u32 q5, [q4, #24]
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vmla.u32 q4, q2, r12
; CHECK-NEXT: vmla.i32 q4, q2, r12
; CHECK-NEXT: vmov q2, q3
; CHECK-NEXT: vstrb.8 q5, [r1], #16
; CHECK-NEXT: vstrw.32 q4, [r3]
@ -490,7 +490,7 @@ define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32*
; CHECK-NEXT: dls lr, r10
; CHECK-NEXT: vmov.i32 q4, #0x0
; CHECK-NEXT: vadd.i32 q5, q5, q0
; CHECK-NEXT: vmlas.u32 q6, q2, r5
; CHECK-NEXT: vmlas.i32 q6, q2, r5
; CHECK-NEXT: .LBB9_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1
; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2
@ -696,7 +696,7 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov.i32 q4, #0x0
; CHECK-NEXT: vmlas.u32 q5, q2, r8
; CHECK-NEXT: vmlas.i32 q5, q2, r8
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: .LBB10_11: @ %vector.body
@ -909,11 +909,11 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly
; CHECK-NEXT: vdup.32 q3, r5
; CHECK-NEXT: vdup.32 q2, r7
; CHECK-NEXT: vadd.i32 q4, q1, r4
; CHECK-NEXT: vmla.u32 q3, q4, r2
; CHECK-NEXT: vmla.i32 q3, q4, r2
; CHECK-NEXT: adds r4, #113
; CHECK-NEXT: vadd.i32 q4, q1, r4
; CHECK-NEXT: mov r4, r8
; CHECK-NEXT: vmla.u32 q2, q4, r2
; CHECK-NEXT: vmla.i32 q2, q4, r2
; CHECK-NEXT: .LBB11_5: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1
; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2

View File

@ -155,7 +155,7 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q2, [r4], #16
; CHECK-NEXT: vmul.i32 q3, q2, r1
; CHECK-NEXT: vmla.u32 q0, q2, r1
; CHECK-NEXT: vmla.i32 q0, q2, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q3, [q1, #80]!
; CHECK-NEXT: le lr, .LBB2_1
@ -243,17 +243,17 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur
; CHECK-NEXT: vldrb.u32 q5, [r0, q2]
; CHECK-NEXT: adds r0, #12
; CHECK-NEXT: vmul.i32 q6, q4, r11
; CHECK-NEXT: vmla.u32 q6, q3, r8
; CHECK-NEXT: vmla.u32 q6, q5, r12
; CHECK-NEXT: vmla.i32 q6, q3, r8
; CHECK-NEXT: vmla.i32 q6, q5, r12
; CHECK-NEXT: vadd.i32 q6, q6, r3
; CHECK-NEXT: vshr.u32 q6, q6, #16
; CHECK-NEXT: vstrb.32 q6, [r1, q1]
; CHECK-NEXT: vmul.i32 q6, q4, r4
; CHECK-NEXT: vmul.i32 q4, q4, r10
; CHECK-NEXT: vmla.u32 q6, q3, r5
; CHECK-NEXT: vmla.u32 q4, q3, r7
; CHECK-NEXT: vmla.u32 q6, q5, r6
; CHECK-NEXT: vmla.u32 q4, q5, r9
; CHECK-NEXT: vmla.i32 q6, q3, r5
; CHECK-NEXT: vmla.i32 q4, q3, r7
; CHECK-NEXT: vmla.i32 q6, q5, r6
; CHECK-NEXT: vmla.i32 q4, q5, r9
; CHECK-NEXT: vadd.i32 q6, q6, r3
; CHECK-NEXT: vadd.i32 q3, q4, r3
; CHECK-NEXT: vshr.u32 q6, q6, #16

View File

@ -104,7 +104,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
; CHECK-LABEL: test_vmlaq_n_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u8 q0, q1, r0
; CHECK-NEXT: vmla.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
@ -117,7 +117,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
; CHECK-LABEL: test_vmlaq_n_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u16 q0, q1, r0
; CHECK-NEXT: vmla.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
@ -130,7 +130,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-LABEL: test_vmlaq_n_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u32 q0, q1, r0
; CHECK-NEXT: vmla.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
@ -143,7 +143,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
; CHECK-LABEL: test_vmlaq_n_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u8 q0, q1, r0
; CHECK-NEXT: vmla.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
@ -156,7 +156,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
; CHECK-LABEL: test_vmlaq_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u16 q0, q1, r0
; CHECK-NEXT: vmla.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
@ -169,7 +169,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-LABEL: test_vmlaq_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u32 q0, q1, r0
; CHECK-NEXT: vmla.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
@ -182,7 +182,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
; CHECK-LABEL: test_vmlasq_n_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u8 q1, q0, r0
; CHECK-NEXT: vmlas.i8 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -196,7 +196,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
; CHECK-LABEL: test_vmlasq_n_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u16 q1, q0, r0
; CHECK-NEXT: vmlas.i16 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -210,7 +210,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-LABEL: test_vmlasq_n_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u32 q1, q0, r0
; CHECK-NEXT: vmlas.i32 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -224,7 +224,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
; CHECK-LABEL: test_vmlasq_n_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u8 q1, q0, r0
; CHECK-NEXT: vmlas.i8 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -238,7 +238,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
; CHECK-LABEL: test_vmlasq_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u16 q1, q0, r0
; CHECK-NEXT: vmlas.i16 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -252,7 +252,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-LABEL: test_vmlasq_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u32 q1, q0, r0
; CHECK-NEXT: vmlas.i32 q1, q0, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
@ -528,7 +528,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_s8(<16 x i8> %a, <16 x i8> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u8 q0, q1, r0
; CHECK-NEXT: vmlat.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i8 %c to i32
@ -543,7 +543,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_s16(<8 x i16> %a, <8 x i16> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u16 q0, q1, r0
; CHECK-NEXT: vmlat.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %c to i32
@ -558,7 +558,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_s32(<4 x i32> %a, <4 x i32> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u32 q0, q1, r0
; CHECK-NEXT: vmlat.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
@ -572,7 +572,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_u8(<16 x i8> %a, <16 x i8> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u8 q0, q1, r0
; CHECK-NEXT: vmlat.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i8 %c to i32
@ -587,7 +587,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_u16(<8 x i16> %a, <8 x i16> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u16 q0, q1, r0
; CHECK-NEXT: vmlat.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %c to i32
@ -602,7 +602,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_u32(<4 x i32> %a, <4 x i32> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlat.u32 q0, q1, r0
; CHECK-NEXT: vmlat.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
@ -616,7 +616,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_s8(<16 x i8> %a, <16 x i8> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u8 q0, q1, r0
; CHECK-NEXT: vmlast.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i8 %c to i32
@ -631,7 +631,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_s16(<8 x i16> %a, <8 x i16> %b
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u16 q0, q1, r0
; CHECK-NEXT: vmlast.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %c to i32
@ -646,7 +646,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_s32(<4 x i32> %a, <4 x i32> %b
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u32 q0, q1, r0
; CHECK-NEXT: vmlast.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
@ -660,7 +660,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_u8(<16 x i8> %a, <16 x i8> %b,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u8 q0, q1, r0
; CHECK-NEXT: vmlast.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i8 %c to i32
@ -675,7 +675,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_u16(<8 x i16> %a, <8 x i16> %b
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u16 q0, q1, r0
; CHECK-NEXT: vmlast.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %c to i32
@ -690,7 +690,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_u32(<4 x i32> %a, <4 x i32> %b
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpst
; CHECK-NEXT: vmlast.u32 q0, q1, r0
; CHECK-NEXT: vmlast.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32

View File

@ -989,7 +989,7 @@ define void @vmlaq(i32* %x, i32* %y, i32 %n, i32 %z) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmla.u32 q1, q0, r3
; CHECK-NEXT: vmla.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB22_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1036,7 +1036,7 @@ define void @vmlaqp(i32* %x, i32* %y, i32 %n, i32 %z) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmla.u32 q1, q0, r3
; CHECK-NEXT: vmla.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB23_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1080,7 +1080,7 @@ define void @vmlasq(i32* %x, i32* %y, i32 %n, i32 %z) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmlas.u32 q1, q0, r3
; CHECK-NEXT: vmlas.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB24_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1127,7 +1127,7 @@ define void @vmlasqp(i32* %x, i32* %y, i32 %n, i32 %z) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmlas.u32 q1, q0, r3
; CHECK-NEXT: vmlas.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB25_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup

View File

@ -968,7 +968,7 @@ define void @vmlaq(i32* %x, i32* %y, i32 %n) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmla.u32 q1, q0, r3
; CHECK-NEXT: vmla.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB22_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1014,7 +1014,7 @@ define void @vmlaqp(i32* %x, i32* %y, i32 %n) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmla.u32 q1, q0, r3
; CHECK-NEXT: vmla.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB23_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1059,7 +1059,7 @@ define void @vmlasq(i32* %x, i32* %y, i32 %n) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmlas.u32 q1, q0, r3
; CHECK-NEXT: vmlas.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB24_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@ -1105,7 +1105,7 @@ define void @vmlasqp(i32* %x, i32* %y, i32 %n) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vmlas.u32 q1, q0, r3
; CHECK-NEXT: vmlas.i32 q1, q0, r3
; CHECK-NEXT: vstrw.32 q1, [r1], #16
; CHECK-NEXT: letp lr, .LBB25_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup

View File

@ -4,7 +4,7 @@
define arm_aapcs_vfpcc <4 x i32> @vmlau32(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind {
; CHECK-LABEL: vmlau32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u32 q0, q1, r0
; CHECK-NEXT: vmla.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <4 x i32> undef, i32 %X, i32 0
@ -17,7 +17,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @vmlau32b(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind {
; CHECK-LABEL: vmlau32b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u32 q0, q1, r0
; CHECK-NEXT: vmla.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <4 x i32> undef, i32 %X, i32 0
@ -30,7 +30,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmlau16(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind {
; CHECK-LABEL: vmlau16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u16 q0, q1, r0
; CHECK-NEXT: vmla.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <8 x i16> undef, i16 %X, i32 0
@ -43,7 +43,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmlau16b(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind {
; CHECK-LABEL: vmlau16b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u16 q0, q1, r0
; CHECK-NEXT: vmla.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <8 x i16> undef, i16 %X, i32 0
@ -56,7 +56,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmlau8(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind {
; CHECK-LABEL: vmlau8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u8 q0, q1, r0
; CHECK-NEXT: vmla.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <16 x i8> undef, i8 %X, i32 0
@ -69,7 +69,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmlau8b(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind {
; CHECK-LABEL: vmlau8b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmla.u8 q0, q1, r0
; CHECK-NEXT: vmla.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <16 x i8> undef, i8 %X, i32 0
@ -87,7 +87,7 @@ define void @vmla32_in_loop(i32* %s1, i32 %x, i32* %d, i32 %n) {
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vldrw.u32 q1, [r2]
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmla.u32 q1, q0, r1
; CHECK-NEXT: vmla.i32 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
@ -125,7 +125,7 @@ define void @vmla16_in_loop(i16* %s1, i16 %x, i16* %d, i32 %n) {
; CHECK-NEXT: vldrh.u16 q0, [r0], #16
; CHECK-NEXT: vldrh.u16 q1, [r2]
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmla.u16 q1, q0, r1
; CHECK-NEXT: vmla.i16 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB7_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
@ -163,7 +163,7 @@ define void @vmla8_in_loop(i8* %s1, i8 %x, i8* %d, i32 %n) {
; CHECK-NEXT: vldrh.u16 q0, [r0], #16
; CHECK-NEXT: vldrh.u16 q1, [r2]
; CHECK-NEXT: subs r3, #16
; CHECK-NEXT: vmla.u8 q1, q0, r1
; CHECK-NEXT: vmla.i8 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB8_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
@ -197,7 +197,7 @@ for.cond.cleanup:
define arm_aapcs_vfpcc <4 x i32> @vmlasu32(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind {
; CHECK-LABEL: vmlasu32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u32 q0, q1, r0
; CHECK-NEXT: vmlas.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <4 x i32> undef, i32 %X, i32 0
@ -210,7 +210,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @vmlasu32b(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind {
; CHECK-LABEL: vmlasu32b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u32 q0, q1, r0
; CHECK-NEXT: vmlas.i32 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <4 x i32> undef, i32 %X, i32 0
@ -223,7 +223,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmlasu16(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind {
; CHECK-LABEL: vmlasu16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u16 q0, q1, r0
; CHECK-NEXT: vmlas.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <8 x i16> undef, i16 %X, i32 0
@ -236,7 +236,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmlasu16b(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind {
; CHECK-LABEL: vmlasu16b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u16 q0, q1, r0
; CHECK-NEXT: vmlas.i16 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <8 x i16> undef, i16 %X, i32 0
@ -249,7 +249,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmlasu8(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind {
; CHECK-LABEL: vmlasu8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u8 q0, q1, r0
; CHECK-NEXT: vmlas.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <16 x i8> undef, i8 %X, i32 0
@ -262,7 +262,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmlasu8b(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind {
; CHECK-LABEL: vmlasu8b:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmlas.u8 q0, q1, r0
; CHECK-NEXT: vmlas.i8 q0, q1, r0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <16 x i8> undef, i8 %X, i32 0
@ -280,7 +280,7 @@ define void @vmlas32_in_loop(i32* %s1, i32 %x, i32* %d, i32 %n) {
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmlas.u32 q1, q0, r1
; CHECK-NEXT: vmlas.i32 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB15_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
@ -318,7 +318,7 @@ define void @vmlas16_in_loop(i16* %s1, i16 %x, i16* %d, i32 %n) {
; CHECK-NEXT: vldrh.u16 q0, [r2]
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmlas.u16 q1, q0, r1
; CHECK-NEXT: vmlas.i16 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB16_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
@ -356,7 +356,7 @@ define void @vmlas8_in_loop(i8* %s1, i8 %x, i8* %d, i32 %n) {
; CHECK-NEXT: vldrh.u16 q0, [r2]
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: subs r3, #16
; CHECK-NEXT: vmlas.u8 q1, q0, r1
; CHECK-NEXT: vmlas.i8 q1, q0, r1
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB17_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup

View File

@ -167,7 +167,7 @@ define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8*
; CHECK-NEXT: vmovlb.u8 q2, q2
; CHECK-NEXT: vmul.i16 q3, q2, q3
; CHECK-NEXT: vldrb.u16 q2, [r0], #8
; CHECK-NEXT: vmla.u16 q3, q1, r3
; CHECK-NEXT: vmla.i16 q3, q1, r3
; CHECK-NEXT: vldrb.u16 q1, [r2], #8
; CHECK-NEXT: vshr.u16 q3, q3, #8
; CHECK-NEXT: vstrb.16 q3, [r0, #-16]

View File

@ -316,28 +316,40 @@ vfmas.f16 q0, q0, r12
# CHECK-NOFP-NOT: vfmas.f32 q0, q3, lr @ encoding: [0x37,0xee,0x4e,0x1e]
vfmas.f32 q0, q3, lr
# CHECK: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
vmlas.i8 q0, q0, r6
# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK-NOFP: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
vmlas.i16 q0, q2, r9
# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
vmlas.i32 q0, q7, r6
# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
vmlas.s8 q0, q0, r6
# CHECK: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK-NOFP: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK-NOFP: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
vmlas.s16 q0, q2, r9
# CHECK: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK-NOFP: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
vmlas.s32 q0, q7, r6
# CHECK: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e]
# CHECK-NOFP: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e]
# CHECK: vmlas.i8 q0, q5, lr @ encoding: [0x0b,0xee,0x4e,0x1e]
# CHECK-NOFP: vmlas.i8 q0, q5, lr @ encoding: [0x0b,0xee,0x4e,0x1e]
vmlas.u8 q0, q5, lr
# CHECK: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e]
# CHECK-NOFP: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e]
# CHECK: vmlas.i16 q0, q3, r12 @ encoding: [0x17,0xee,0x4c,0x1e]
# CHECK-NOFP: vmlas.i16 q0, q3, r12 @ encoding: [0x17,0xee,0x4c,0x1e]
vmlas.u16 q0, q3, r12
# CHECK: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e]
# CHECK-NOFP: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e]
# CHECK: vmlas.i32 q1, q1, r11 @ encoding: [0x23,0xee,0x4b,0x3e]
# CHECK-NOFP: vmlas.i32 q1, q1, r11 @ encoding: [0x23,0xee,0x4b,0x3e]
vmlas.u32 q1, q1, r11
# CHECK: vfma.f16 q1, q1, r6 @ encoding: [0x33,0xfe,0x46,0x2e]
@ -348,28 +360,40 @@ vfma.f16 q1, q1, r6
# CHECK-NOFP-NOT: vfmas.f32 q7, q4, r6 @ encoding: [0x39,0xee,0x46,0xfe]
vfmas.f32 q7, q4, r6
# CHECK: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK-NOFP: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK-NOFP: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
vmla.i8 q0, q3, r8
# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK-NOFP: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
vmla.i16 q1, q3, r10
# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK-NOFP: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
vmla.i32 q1, q3, r1
# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK-NOFP: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
vmla.s8 q0, q3, r8
# CHECK: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK-NOFP: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK-NOFP: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
vmla.s16 q1, q3, r10
# CHECK: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK-NOFP: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK-NOFP: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
vmla.s32 q1, q3, r1
# CHECK: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e]
# CHECK-NOFP: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e]
# CHECK: vmla.i8 q0, q7, r10 @ encoding: [0x0f,0xee,0x4a,0x0e]
# CHECK-NOFP: vmla.i8 q0, q7, r10 @ encoding: [0x0f,0xee,0x4a,0x0e]
vmla.u8 q0, q7, r10
# CHECK: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e]
# CHECK-NOFP: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e]
# CHECK: vmla.i16 q0, q0, r7 @ encoding: [0x11,0xee,0x47,0x0e]
# CHECK-NOFP: vmla.i16 q0, q0, r7 @ encoding: [0x11,0xee,0x47,0x0e]
vmla.u16 q0, q0, r7
# CHECK: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e]
# CHECK-NOFP: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e]
# CHECK: vmla.i32 q1, q6, r10 @ encoding: [0x2d,0xee,0x4a,0x2e]
# CHECK-NOFP: vmla.i32 q1, q6, r10 @ encoding: [0x2d,0xee,0x4a,0x2e]
vmla.u32 q1, q6, r10
# CHECK: vqdmlash.s8 q0, q0, r5 @ encoding: [0x00,0xee,0x65,0x1e]

View File

@ -315,27 +315,27 @@
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x37,0xee,0x4e,0x1e]
# CHECK: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x01,0xee,0x46,0x1e]
# CHECK: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x15,0xee,0x49,0x1e]
# CHECK: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x2f,0xee,0x46,0x1e]
# CHECK: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x0b,0xfe,0x4e,0x1e]
# CHECK: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x17,0xfe,0x4c,0x1e]
# CHECK: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x23,0xfe,0x4b,0x3e]
@ -347,27 +347,27 @@
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x39,0xee,0x46,0xfe]
# CHECK: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x07,0xee,0x48,0x0e]
# CHECK: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x17,0xee,0x4a,0x2e]
# CHECK: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e]
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x27,0xee,0x41,0x2e]
# CHECK: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x0f,0xfe,0x4a,0x0e]
# CHECK: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x11,0xfe,0x47,0x0e]
# CHECK: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e]
# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding
# CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding
[0x2d,0xfe,0x4a,0x2e]

View File

@ -803,7 +803,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i64 %r.0.lcssa
}
; 4x to use VMLA.u32
; 4x to use VMLA.i32
define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i32_i32(
; CHECK-NEXT: entry:
@ -856,7 +856,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i32 %r.0.lcssa
}
; 8x to use VMLA.u16
; 8x to use VMLA.i16
define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i16_i32(
; CHECK-NEXT: entry:
@ -913,7 +913,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i32 %r.0.lcssa
}
; 16x to use VMLA.u8
; 16x to use VMLA.i8
define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i8_i32(
; CHECK-NEXT: entry:
@ -970,7 +970,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i32 %r.0.lcssa
}
; 8x to use VMLA.u16
; 8x to use VMLA.i16
define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i16_i16(
; CHECK-NEXT: entry:
@ -1023,7 +1023,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i16 %r.0.lcssa
}
; 16x to use VMLA.u8
; 16x to use VMLA.i8
define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i8_i16(
; CHECK-NEXT: entry:
@ -1080,7 +1080,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret i16 %r.0.lcssa
}
; 16x to use VMLA.u8
; 16x to use VMLA.i8
define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i8_i8(
; CHECK-NEXT: entry:
@ -1430,7 +1430,7 @@ exit:
ret i32 %ret.lcssa
}
; 16x to use VMLA.u8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))`
; 16x to use VMLA.i8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))`
define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
; CHECK-LABEL: @mla_i8_i32_multiuse(
; CHECK-NEXT: entry:

View File

@ -583,18 +583,12 @@ TEST(MachineInstrValidTailPredication, IsCorrect) {
case MVE_VRMLALDAVHu32:
case MVE_VRMLSLDAVHas32:
case MVE_VRMLSLDAVHs32:
case MVE_VMLAS_qr_s16:
case MVE_VMLAS_qr_s32:
case MVE_VMLAS_qr_s8:
case MVE_VMLAS_qr_u16:
case MVE_VMLAS_qr_u32:
case MVE_VMLAS_qr_u8:
case MVE_VMLA_qr_s16:
case MVE_VMLA_qr_s32:
case MVE_VMLA_qr_s8:
case MVE_VMLA_qr_u16:
case MVE_VMLA_qr_u32:
case MVE_VMLA_qr_u8:
case MVE_VMLAS_qr_i16:
case MVE_VMLAS_qr_i32:
case MVE_VMLAS_qr_i8:
case MVE_VMLA_qr_i16:
case MVE_VMLA_qr_i32:
case MVE_VMLA_qr_i8:
case MVE_VHADD_qr_s16:
case MVE_VHADD_qr_s32:
case MVE_VHADD_qr_s8:
@ -1311,10 +1305,8 @@ TEST(MachineInstr, MVEVecSize) {
case MVE_VMLADAVs8:
case MVE_VMLADAVu8:
case MVE_VMLADAVxs8:
case MVE_VMLAS_qr_s8:
case MVE_VMLAS_qr_u8:
case MVE_VMLA_qr_s8:
case MVE_VMLA_qr_u8:
case MVE_VMLAS_qr_i8:
case MVE_VMLA_qr_i8:
case MVE_VMLSDAVas8:
case MVE_VMLSDAVaxs8:
case MVE_VMLSDAVs8:
@ -1542,10 +1534,8 @@ TEST(MachineInstr, MVEVecSize) {
case MVE_VMLALDAVs16:
case MVE_VMLALDAVu16:
case MVE_VMLALDAVxs16:
case MVE_VMLAS_qr_s16:
case MVE_VMLAS_qr_u16:
case MVE_VMLA_qr_s16:
case MVE_VMLA_qr_u16:
case MVE_VMLAS_qr_i16:
case MVE_VMLA_qr_i16:
case MVE_VMLSDAVas16:
case MVE_VMLSDAVaxs16:
case MVE_VMLSDAVs16:
@ -1856,10 +1846,8 @@ TEST(MachineInstr, MVEVecSize) {
case MVE_VMLALDAVs32:
case MVE_VMLALDAVu32:
case MVE_VMLALDAVxs32:
case MVE_VMLAS_qr_s32:
case MVE_VMLAS_qr_u32:
case MVE_VMLA_qr_s32:
case MVE_VMLA_qr_u32:
case MVE_VMLAS_qr_i32:
case MVE_VMLA_qr_i32:
case MVE_VMLSDAVas32:
case MVE_VMLSDAVaxs32:
case MVE_VMLSDAVs32: