forked from OSchip/llvm-project
410 lines
19 KiB
LLVM
410 lines
19 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
; This file tests the following combinations related to streaming-enabled functions:
|
|
; [ ] N -> S (Normal -> Streaming)
|
|
; [ ] S -> N (Streaming -> Normal)
|
|
; [ ] S -> S (Streaming -> Streaming)
|
|
; [ ] S -> SC (Streaming -> Streaming-compatible)
|
|
;
|
|
; The following combination is tested in sme-streaming-compatible-interface.ll
|
|
; [ ] SC -> S (Streaming-compatible -> Streaming)
|
|
|
|
declare void @normal_callee()
|
|
declare void @streaming_callee() "aarch64_pstate_sm_enabled"
|
|
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
|
|
|
|
; [x] N -> S
|
|
; [ ] S -> N
|
|
; [ ] S -> S
|
|
; [ ] S -> SC
|
|
define void @normal_caller_streaming_callee() nounwind {
|
|
; CHECK-LABEL: normal_caller_streaming_callee:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_callee()
|
|
ret void;
|
|
}
|
|
|
|
; [ ] N -> S
|
|
; [x] S -> N
|
|
; [ ] S -> S
|
|
; [ ] S -> SC
|
|
define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
|
|
; CHECK-LABEL: streaming_caller_normal_callee:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: bl normal_callee
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @normal_callee()
|
|
ret void;
|
|
}
|
|
|
|
; [ ] N -> S
|
|
; [ ] S -> N
|
|
; [x] S -> S
|
|
; [ ] S -> SC
|
|
define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
|
|
; CHECK-LABEL: streaming_caller_streaming_callee:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_callee()
|
|
ret void;
|
|
}
|
|
|
|
; [ ] N -> S
|
|
; [ ] S -> N
|
|
; [ ] S -> S
|
|
; [x] S -> SC
|
|
define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
|
|
; CHECK-LABEL: streaming_caller_streaming_compatible_callee:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: bl streaming_compatible_callee
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_compatible_callee()
|
|
ret void;
|
|
}
|
|
|
|
;
|
|
; Handle special cases here.
|
|
;
|
|
|
|
; Call to function-pointer (with attribute)
|
|
define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
|
|
; CHECK-LABEL: call_to_function_pointer_streaming_enabled:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: blr x0
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void %p() "aarch64_pstate_sm_enabled"
|
|
ret void
|
|
}
|
|
|
|
; Ensure NEON registers are preserved correctly.
|
|
define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
|
|
; CHECK-LABEL: smstart_clobber_simdfp:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #96
|
|
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-NEXT: add sp, sp, #96
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_callee()
|
|
ret <4 x i32> %x;
|
|
}
|
|
|
|
; Ensure SVE registers are preserved correctly.
|
|
define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) #0 {
|
|
; CHECK-LABEL: smstart_clobber_sve:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: addvl sp, sp, #-18
|
|
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: addvl sp, sp, #-1
|
|
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: addvl sp, sp, #1
|
|
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: addvl sp, sp, #18
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_callee()
|
|
ret <vscale x 4 x i32> %x;
|
|
}
|
|
|
|
; Call streaming callee twice; there should be no spills/fills between the two
|
|
; calls since the registers should have already been clobbered.
|
|
define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) #0 {
|
|
; CHECK-LABEL: smstart_clobber_sve_duplicate:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: addvl sp, sp, #-18
|
|
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
|
|
; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
|
|
; CHECK-NEXT: addvl sp, sp, #-1
|
|
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
|
|
; CHECK-NEXT: addvl sp, sp, #1
|
|
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
|
|
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
|
|
; CHECK-NEXT: addvl sp, sp, #18
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
call void @streaming_callee()
|
|
call void @streaming_callee()
|
|
ret <vscale x 4 x i32> %x;
|
|
}
|
|
|
|
; Ensure smstart is not removed, because call to llvm.cos is not part of a chain.
|
|
define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
|
|
; CHECK-LABEL: call_to_intrinsic_without_chain:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: sub sp, sp, #96
|
|
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
|
|
; CHECK-NEXT: bl cos
|
|
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-NEXT: fadd d0, d1, d0
|
|
; CHECK-NEXT: add sp, sp, #96
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call fast double @llvm.cos.f64(double %x)
|
|
%res.fadd = fadd fast double %res, %x
|
|
ret double %res.fadd
|
|
}
|
|
|
|
declare double @llvm.cos.f64(double)
|
|
|
|
; Ensure that tail call optimization is disabled when the streaming mode
|
|
; doesn't match.
|
|
define void @disable_tailcallopt() nounwind {
|
|
; CHECK-LABEL: disable_tailcallopt:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: bl streaming_callee
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
tail call void @streaming_callee()
|
|
ret void;
|
|
}
|
|
|
|
define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #1 {
|
|
; CHECK-LABEL: call_to_non_streaming_pass_sve_objects:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-NEXT: addvl sp, sp, #-3
|
|
; CHECK-NEXT: rdsvl x8, #1
|
|
; CHECK-NEXT: addvl x9, sp, #2
|
|
; CHECK-NEXT: addvl x10, sp, #1
|
|
; CHECK-NEXT: mov x11, sp
|
|
; CHECK-NEXT: smstop sm
|
|
; CHECK-NEXT: mov x0, x9
|
|
; CHECK-NEXT: mov x1, x10
|
|
; CHECK-NEXT: mov x2, x11
|
|
; CHECK-NEXT: mov x3, x8
|
|
; CHECK-NEXT: bl foo
|
|
; CHECK-NEXT: smstart sm
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl]
|
|
; CHECK-NEXT: fmov w0, s0
|
|
; CHECK-NEXT: addvl sp, sp, #3
|
|
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%Data1 = alloca <vscale x 16 x i8>, align 16
|
|
%Data2 = alloca <vscale x 16 x i8>, align 16
|
|
%Data3 = alloca <vscale x 16 x i8>, align 16
|
|
%0 = tail call i64 @llvm.aarch64.sme.cntsb()
|
|
call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
|
|
%1 = load <vscale x 16 x i8>, ptr %Data1, align 16
|
|
%vecext = extractelement <vscale x 16 x i8> %1, i64 0
|
|
ret i8 %vecext
|
|
}
|
|
|
|
declare i64 @llvm.aarch64.sme.cntsb()
|
|
|
|
declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
|
|
|
|
attributes #0 = { nounwind "target-features"="+sve" }
|
|
attributes #1 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }
|