forked from OSchip/llvm-project
AMDGPU: Use modern address spaces in some tests
This was way out of date, still using 4 for generic and 0 for private.
This commit is contained in:
parent
f946c70130
commit
c1710e7779
|
@ -28,13 +28,13 @@ define i1 @fold_negate_intrinsic_test_mask_dbl(double %x) nounwind {
|
||||||
; Negative test: should not transform for variable test masks
|
; Negative test: should not transform for variable test masks
|
||||||
; CHECK: @fold_negate_intrinsic_test_mask_neg_var
|
; CHECK: @fold_negate_intrinsic_test_mask_neg_var
|
||||||
; CHECK: %[[X0:.*]] = alloca i32
|
; CHECK: %[[X0:.*]] = alloca i32
|
||||||
; CHECK: %[[X1:.*]] = load i32, i32* %[[X0]]
|
; CHECK: %[[X1:.*]] = load i32, i32 addrspace(5)* %[[X0]]
|
||||||
; CHECK: call i1 @llvm.amdgcn.class.f32(float %x, i32 %[[X1]])
|
; CHECK: call i1 @llvm.amdgcn.class.f32(float %x, i32 %[[X1]])
|
||||||
; CHECK: xor
|
; CHECK: xor
|
||||||
define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind {
|
define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind {
|
||||||
%1 = alloca i32
|
%1 = alloca i32, addrspace(5)
|
||||||
store i32 7, i32* %1
|
store i32 7, i32 addrspace(5)* %1
|
||||||
%2 = load i32, i32* %1
|
%2 = load i32, i32 addrspace(5)* %1
|
||||||
%3 = call i1 @llvm.amdgcn.class.f32(float %x, i32 %2)
|
%3 = call i1 @llvm.amdgcn.class.f32(float %x, i32 %2)
|
||||||
%4 = xor i1 %3, -1
|
%4 = xor i1 %3, -1
|
||||||
ret i1 %4
|
ret i1 %4
|
||||||
|
@ -47,10 +47,10 @@ define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind {
|
||||||
; CHECK: store i1 %[[X1]]
|
; CHECK: store i1 %[[X1]]
|
||||||
; CHECK: %[[X2:.*]] = xor i1 %[[X1]]
|
; CHECK: %[[X2:.*]] = xor i1 %[[X1]]
|
||||||
define i1 @fold_negate_intrinsic_test_mask_neg_multiple_uses(float %x) nounwind {
|
define i1 @fold_negate_intrinsic_test_mask_neg_multiple_uses(float %x) nounwind {
|
||||||
%y = alloca i1
|
%y = alloca i1, addrspace(5)
|
||||||
%1 = call i1 @llvm.amdgcn.class.f32(float %x, i32 7)
|
%1 = call i1 @llvm.amdgcn.class.f32(float %x, i32 7)
|
||||||
%2 = xor i1 %1, -1
|
%2 = xor i1 %1, -1
|
||||||
store i1 %1, i1* %y
|
store i1 %1, i1 addrspace(5)* %y
|
||||||
%3 = xor i1 %1, -1
|
%3 = xor i1 %1, -1
|
||||||
ret i1 %2
|
ret i1 %2
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,15 +12,15 @@ define internal void @indirect() {
|
||||||
define internal void @direct() {
|
define internal void @direct() {
|
||||||
; CHECK-LABEL: define {{[^@]+}}@direct
|
; CHECK-LABEL: define {{[^@]+}}@direct
|
||||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||||
; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
|
||||||
; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
; CHECK-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; CHECK-NEXT: call void [[FP]]()
|
; CHECK-NEXT: call void [[FP]]()
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%fptr = alloca void()*
|
%fptr = alloca void()*, addrspace(5)
|
||||||
store void()* @indirect, void()** %fptr
|
store void()* @indirect, void()* addrspace(5)* %fptr
|
||||||
%fp = load void()*, void()** %fptr
|
%fp = load void()*, void()* addrspace(5)* %fptr
|
||||||
call void %fp()
|
call void %fp()
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,23 +16,23 @@ define internal void @indirect() {
|
||||||
define amdgpu_kernel void @test_simple_indirect_call() #0 {
|
define amdgpu_kernel void @test_simple_indirect_call() #0 {
|
||||||
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
||||||
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
||||||
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
|
||||||
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
; AKF_GCN-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; AKF_GCN-NEXT: call void [[FP]]()
|
; AKF_GCN-NEXT: call void [[FP]]()
|
||||||
; AKF_GCN-NEXT: ret void
|
; AKF_GCN-NEXT: ret void
|
||||||
;
|
;
|
||||||
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
||||||
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||||
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
|
||||||
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
|
||||||
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
|
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
|
||||||
; ATTRIBUTOR_GCN-NEXT: ret void
|
; ATTRIBUTOR_GCN-NEXT: ret void
|
||||||
;
|
;
|
||||||
%fptr = alloca void()*
|
%fptr = alloca void()*, addrspace(5)
|
||||||
store void()* @indirect, void()** %fptr
|
store void()* @indirect, void()* addrspace(5)* %fptr
|
||||||
%fp = load void()*, void()** %fptr
|
%fp = load void()*, void()* addrspace(5)* %fptr
|
||||||
call void %fp()
|
call void %fp()
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,19 +8,19 @@
|
||||||
|
|
||||||
define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
|
define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [5 x i32], align 4
|
%stack = alloca [5 x i32], align 4, addrspace(5)
|
||||||
%0 = load i32, i32 addrspace(1)* %in, align 4
|
%0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
|
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
|
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
|
||||||
%2 = load i32, i32* %arrayidx10, align 4
|
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %2, i32 addrspace(1)* %out, align 4
|
store i32 %2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
|
||||||
%3 = load i32, i32* %arrayidx12
|
%3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %3, i32 addrspace(1)* %arrayidx13
|
store i32 %3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
@ -30,19 +30,19 @@ entry:
|
||||||
|
|
||||||
define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 {
|
define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [5 x i32], align 4
|
%stack = alloca [5 x i32], align 4, addrspace(5)
|
||||||
%0 = load i32, i32 addrspace(1)* %in, align 4
|
%0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
|
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
|
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
|
||||||
%2 = load i32, i32* %arrayidx10, align 4
|
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %2, i32 addrspace(1)* %out, align 4
|
store i32 %2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
|
||||||
%3 = load i32, i32* %arrayidx12
|
%3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %3, i32 addrspace(1)* %arrayidx13
|
store i32 %3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
@ -54,19 +54,19 @@ entry:
|
||||||
|
|
||||||
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
|
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [5 x i32], align 4
|
%stack = alloca [5 x i32], align 4, addrspace(5)
|
||||||
%0 = load i32, i32 addrspace(1)* %in, align 4
|
%0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
|
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
|
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
|
||||||
%2 = load i32, i32* %arrayidx10, align 4
|
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %2, i32 addrspace(1)* %out, align 4
|
store i32 %2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
|
||||||
%3 = load i32, i32* %arrayidx12
|
%3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %3, i32 addrspace(1)* %arrayidx13
|
store i32 %3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
@ -77,19 +77,19 @@ entry:
|
||||||
; SI: alloca [5 x i32]
|
; SI: alloca [5 x i32]
|
||||||
define amdgpu_kernel void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
|
define amdgpu_kernel void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [5 x i32], align 4
|
%stack = alloca [5 x i32], align 4, addrspace(5)
|
||||||
%0 = load i32, i32 addrspace(1)* %in, align 4
|
%0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
|
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
|
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
|
||||||
%2 = load i32, i32* %arrayidx10, align 4
|
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %2, i32 addrspace(1)* %out, align 4
|
store i32 %2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
|
||||||
%3 = load i32, i32* %arrayidx12
|
%3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %3, i32 addrspace(1)* %arrayidx13
|
store i32 %3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
@ -100,19 +100,19 @@ entry:
|
||||||
; SI: alloca [5 x i32]
|
; SI: alloca [5 x i32]
|
||||||
define amdgpu_kernel void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
|
define amdgpu_kernel void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [5 x i32], align 4
|
%stack = alloca [5 x i32], align 4, addrspace(5)
|
||||||
%0 = load i32, i32 addrspace(1)* %in, align 4
|
%0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
|
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
|
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
|
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
|
||||||
%2 = load i32, i32* %arrayidx10, align 4
|
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %2, i32 addrspace(1)* %out, align 4
|
store i32 %2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
|
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
|
||||||
%3 = load i32, i32* %arrayidx12
|
%3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %3, i32 addrspace(1)* %arrayidx13
|
store i32 %3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
@ -124,21 +124,21 @@ entry:
|
||||||
; CI-NOT: alloca
|
; CI-NOT: alloca
|
||||||
define amdgpu_kernel void @occupancy_6(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
|
define amdgpu_kernel void @occupancy_6(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [42 x i8], align 4
|
%stack = alloca [42 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
@ -150,21 +150,21 @@ entry:
|
||||||
|
|
||||||
define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
|
define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [43 x i8], align 4
|
%stack = alloca [43 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
@ -176,21 +176,21 @@ entry:
|
||||||
; CI-NOT: alloca
|
; CI-NOT: alloca
|
||||||
define amdgpu_kernel void @occupancy_8(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
|
define amdgpu_kernel void @occupancy_8(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [32 x i8], align 4
|
%stack = alloca [32 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
@ -202,21 +202,21 @@ entry:
|
||||||
|
|
||||||
define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
|
define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [33 x i8], align 4
|
%stack = alloca [33 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
@ -228,21 +228,21 @@ entry:
|
||||||
; CI-NOT: alloca
|
; CI-NOT: alloca
|
||||||
define amdgpu_kernel void @occupancy_9(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
|
define amdgpu_kernel void @occupancy_9(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [28 x i8], align 4
|
%stack = alloca [28 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
@ -254,21 +254,21 @@ entry:
|
||||||
|
|
||||||
define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
|
define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [29 x i8], align 4
|
%stack = alloca [29 x i8], align 4, addrspace(5)
|
||||||
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
%tmp = load i8, i8 addrspace(1)* %in, align 1
|
||||||
%tmp4 = sext i8 %tmp to i64
|
%tmp4 = sext i8 %tmp to i64
|
||||||
%arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp4
|
%arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
|
||||||
store i8 4, i8* %arrayidx1, align 1
|
store i8 4, i8 addrspace(5)* %arrayidx1, align 1
|
||||||
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
|
||||||
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
|
||||||
%tmp5 = sext i8 %tmp1 to i64
|
%tmp5 = sext i8 %tmp1 to i64
|
||||||
%arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp5
|
%arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
|
||||||
store i8 5, i8* %arrayidx3, align 1
|
store i8 5, i8 addrspace(5)* %arrayidx3, align 1
|
||||||
%arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 0
|
%arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 0
|
||||||
%tmp2 = load i8, i8* %arrayidx10, align 1
|
%tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
|
||||||
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
store i8 %tmp2, i8 addrspace(1)* %out, align 1
|
||||||
%arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 1
|
%arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 1
|
||||||
%tmp3 = load i8, i8* %arrayidx12, align 1
|
%tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
|
||||||
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
|
||||||
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
|
||||||
ret void
|
ret void
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
; FUNC-LABEL: @test_kernel(
|
; FUNC-LABEL: @test_kernel(
|
||||||
; R600-LABEL: entry
|
; R600-LABEL: entry
|
||||||
; R600-NOT: call i8 addrspace(1)* @__printf_alloc
|
; R600-NOT: call i8 addrspace(1)* @__printf_alloc
|
||||||
; R600: call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n)
|
; R600: call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n)
|
||||||
; GCN-LABEL: entry
|
; GCN-LABEL: entry
|
||||||
; GCN: call i8 addrspace(1)* @__printf_alloc
|
; GCN: call i8 addrspace(1)* @__printf_alloc
|
||||||
; GCN-LABEL: entry.split
|
; GCN-LABEL: entry.split
|
||||||
|
@ -15,7 +15,7 @@
|
||||||
; GCN: %PrintBuffIdCast = bitcast i8 addrspace(1)* %PrintBuffID to i32 addrspace(1)*
|
; GCN: %PrintBuffIdCast = bitcast i8 addrspace(1)* %PrintBuffID to i32 addrspace(1)*
|
||||||
; GCN: store i32 1, i32 addrspace(1)* %PrintBuffIdCast
|
; GCN: store i32 1, i32 addrspace(1)* %PrintBuffIdCast
|
||||||
; GCN: %PrintBuffGep = getelementptr i8, i8 addrspace(1)* %printf_alloc_fn, i32 4
|
; GCN: %PrintBuffGep = getelementptr i8, i8 addrspace(1)* %printf_alloc_fn, i32 4
|
||||||
; GCN: %PrintArgPtr = ptrtoint i8* %arraydecay to i64
|
; GCN: %PrintArgPtr = ptrtoint i8 addrspace(5)* %arraydecay to i64
|
||||||
; GCN: %PrintBuffPtrCast = bitcast i8 addrspace(1)* %PrintBuffGep to i64 addrspace(1)*
|
; GCN: %PrintBuffPtrCast = bitcast i8 addrspace(1)* %PrintBuffGep to i64 addrspace(1)*
|
||||||
; GCN: store i64 %PrintArgPtr, i64 addrspace(1)* %PrintBuffPtrCast
|
; GCN: store i64 %PrintArgPtr, i64 addrspace(1)* %PrintBuffPtrCast
|
||||||
; GCN: %PrintBuffNextPtr = getelementptr i8, i8 addrspace(1)* %PrintBuffGep, i32 8
|
; GCN: %PrintBuffNextPtr = getelementptr i8, i8 addrspace(1)* %PrintBuffGep, i32 8
|
||||||
|
@ -26,9 +26,9 @@
|
||||||
|
|
||||||
define amdgpu_kernel void @test_kernel(i32 %n) {
|
define amdgpu_kernel void @test_kernel(i32 %n) {
|
||||||
entry:
|
entry:
|
||||||
%str = alloca [9 x i8], align 1
|
%str = alloca [9 x i8], align 1, addrspace(5)
|
||||||
%arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0
|
%arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0
|
||||||
%call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n)
|
%call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,23 +14,23 @@
|
||||||
; CHECK-NEXT: OR_INT
|
; CHECK-NEXT: OR_INT
|
||||||
define amdgpu_kernel void @_Z9chk1D_512v() #0 {
|
define amdgpu_kernel void @_Z9chk1D_512v() #0 {
|
||||||
entry:
|
entry:
|
||||||
%a0 = alloca i32, align 4
|
%a0 = alloca i32, align 4, addrspace(5)
|
||||||
%b0 = alloca i32, align 4
|
%b0 = alloca i32, align 4, addrspace(5)
|
||||||
%c0 = alloca i32, align 4
|
%c0 = alloca i32, align 4, addrspace(5)
|
||||||
%d0 = alloca i32, align 4
|
%d0 = alloca i32, align 4, addrspace(5)
|
||||||
%a1 = alloca i32, align 4
|
%a1 = alloca i32, align 4, addrspace(5)
|
||||||
%b1 = alloca i32, align 4
|
%b1 = alloca i32, align 4, addrspace(5)
|
||||||
%c1 = alloca i32, align 4
|
%c1 = alloca i32, align 4, addrspace(5)
|
||||||
%d1 = alloca i32, align 4
|
%d1 = alloca i32, align 4, addrspace(5)
|
||||||
%data = alloca i32, align 4
|
%data = alloca i32, align 4, addrspace(5)
|
||||||
%0 = load i32, i32* %a0, align 4
|
%0 = load i32, i32 addrspace(5)* %a0, align 4
|
||||||
%1 = load i32, i32* %b0, align 4
|
%1 = load i32, i32 addrspace(5)* %b0, align 4
|
||||||
%cmp = icmp ne i32 %0, %1
|
%cmp = icmp ne i32 %0, %1
|
||||||
br i1 %cmp, label %land.lhs.true, label %if.else
|
br i1 %cmp, label %land.lhs.true, label %if.else
|
||||||
|
|
||||||
land.lhs.true: ; preds = %entry
|
land.lhs.true: ; preds = %entry
|
||||||
%2 = load i32, i32* %c0, align 4
|
%2 = load i32, i32 addrspace(5)* %c0, align 4
|
||||||
%3 = load i32, i32* %d0, align 4
|
%3 = load i32, i32 addrspace(5)* %d0, align 4
|
||||||
%cmp1 = icmp ne i32 %2, %3
|
%cmp1 = icmp ne i32 %2, %3
|
||||||
br i1 %cmp1, label %if.then, label %if.else
|
br i1 %cmp1, label %if.then, label %if.else
|
||||||
|
|
||||||
|
@ -38,18 +38,18 @@ if.then: ; preds = %land.lhs.true
|
||||||
br label %if.end
|
br label %if.end
|
||||||
|
|
||||||
if.else: ; preds = %land.lhs.true, %entry
|
if.else: ; preds = %land.lhs.true, %entry
|
||||||
store i32 1, i32* %data, align 4
|
store i32 1, i32 addrspace(5)* %data, align 4
|
||||||
br label %if.end
|
br label %if.end
|
||||||
|
|
||||||
if.end: ; preds = %if.else, %if.then
|
if.end: ; preds = %if.else, %if.then
|
||||||
%4 = load i32, i32* %a1, align 4
|
%4 = load i32, i32 addrspace(5)* %a1, align 4
|
||||||
%5 = load i32, i32* %b1, align 4
|
%5 = load i32, i32 addrspace(5)* %b1, align 4
|
||||||
%cmp2 = icmp ne i32 %4, %5
|
%cmp2 = icmp ne i32 %4, %5
|
||||||
br i1 %cmp2, label %land.lhs.true3, label %if.else6
|
br i1 %cmp2, label %land.lhs.true3, label %if.else6
|
||||||
|
|
||||||
land.lhs.true3: ; preds = %if.end
|
land.lhs.true3: ; preds = %if.end
|
||||||
%6 = load i32, i32* %c1, align 4
|
%6 = load i32, i32 addrspace(5)* %c1, align 4
|
||||||
%7 = load i32, i32* %d1, align 4
|
%7 = load i32, i32 addrspace(5)* %d1, align 4
|
||||||
%cmp4 = icmp ne i32 %6, %7
|
%cmp4 = icmp ne i32 %6, %7
|
||||||
br i1 %cmp4, label %if.then5, label %if.else6
|
br i1 %cmp4, label %if.then5, label %if.else6
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ if.then5: ; preds = %land.lhs.true3
|
||||||
br label %if.end7
|
br label %if.end7
|
||||||
|
|
||||||
if.else6: ; preds = %land.lhs.true3, %if.end
|
if.else6: ; preds = %land.lhs.true3, %if.end
|
||||||
store i32 1, i32* %data, align 4
|
store i32 1, i32 addrspace(5)* %data, align 4
|
||||||
br label %if.end7
|
br label %if.end7
|
||||||
|
|
||||||
if.end7: ; preds = %if.else6, %if.then5
|
if.end7: ; preds = %if.else6, %if.then5
|
||||||
|
|
|
@ -5,15 +5,15 @@
|
||||||
; CHECK-LABEL: @invalid_bitcast_addrspace(
|
; CHECK-LABEL: @invalid_bitcast_addrspace(
|
||||||
; CHECK: getelementptr inbounds [256 x [1 x i32]], [256 x [1 x i32]] addrspace(3)* @invalid_bitcast_addrspace.data, i32 0, i32 %14
|
; CHECK: getelementptr inbounds [256 x [1 x i32]], [256 x [1 x i32]] addrspace(3)* @invalid_bitcast_addrspace.data, i32 0, i32 %14
|
||||||
; CHECK: bitcast [1 x i32] addrspace(3)* %{{[0-9]+}} to half addrspace(3)*
|
; CHECK: bitcast [1 x i32] addrspace(3)* %{{[0-9]+}} to half addrspace(3)*
|
||||||
; CHECK: addrspacecast half addrspace(3)* %tmp to half addrspace(4)*
|
; CHECK: addrspacecast half addrspace(3)* %tmp to half*
|
||||||
; CHECK: bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)*
|
; CHECK: bitcast half* %tmp1 to <2 x i16>*
|
||||||
define amdgpu_kernel void @invalid_bitcast_addrspace() #0 {
|
define amdgpu_kernel void @invalid_bitcast_addrspace() #0 {
|
||||||
entry:
|
entry:
|
||||||
%data = alloca [1 x i32], align 4
|
%data = alloca [1 x i32], addrspace(5)
|
||||||
%tmp = bitcast [1 x i32]* %data to half*
|
%tmp = bitcast [1 x i32] addrspace(5)* %data to half addrspace(5)*
|
||||||
%tmp1 = addrspacecast half* %tmp to half addrspace(4)*
|
%tmp1 = addrspacecast half addrspace(5)* %tmp to half*
|
||||||
%tmp2 = bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)*
|
%tmp2 = bitcast half* %tmp1 to <2 x i16>*
|
||||||
%tmp3 = load <2 x i16>, <2 x i16> addrspace(4)* %tmp2, align 2
|
%tmp3 = load <2 x i16>, <2 x i16>* %tmp2, align 2
|
||||||
%tmp4 = bitcast <2 x i16> %tmp3 to <2 x half>
|
%tmp4 = bitcast <2 x i16> %tmp3 to <2 x half>
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,19 +16,19 @@
|
||||||
|
|
||||||
define amdgpu_vs void @promote_1d_aggr() #0 {
|
define amdgpu_vs void @promote_1d_aggr() #0 {
|
||||||
; CHECK-LABEL: @promote_1d_aggr(
|
; CHECK-LABEL: @promote_1d_aggr(
|
||||||
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4
|
; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 1
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 1
|
||||||
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
||||||
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4
|
; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 0
|
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 0
|
||||||
; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], [1 x float] addrspace(1)* [[FOO2]], align 4
|
; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], [1 x float] addrspace(1)* [[FOO2]], align 4
|
||||||
; CHECK-NEXT: store [1 x float] [[FOO3]], [1 x float]* [[F1]], align 4
|
; CHECK-NEXT: store [1 x float] [[FOO3]], [1 x float] addrspace(5)* [[F1]], align 4
|
||||||
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], [1 x float]* [[F1]], i32 0, i32 [[FOO4]]
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], [1 x float] addrspace(5)* [[F1]], i32 0, i32 [[FOO4]]
|
||||||
; CHECK-NEXT: [[FOO6:%.*]] = load float, float* [[FOO5]], align 4
|
; CHECK-NEXT: [[FOO6:%.*]] = load float, float addrspace(5)* [[FOO5]], align 4
|
||||||
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16
|
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16
|
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float> addrspace(5)* [[FOO7]], align 16
|
||||||
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[FOO6]], i32 0
|
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[FOO6]], i32 0
|
||||||
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
|
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
|
||||||
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
|
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
|
||||||
|
@ -37,19 +37,19 @@ define amdgpu_vs void @promote_1d_aggr() #0 {
|
||||||
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
|
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%i = alloca i32
|
%i = alloca i32, addrspace(5)
|
||||||
%f1 = alloca [1 x float]
|
%f1 = alloca [1 x float], addrspace(5)
|
||||||
%foo = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
|
%foo = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
|
||||||
%foo1 = load i32, i32 addrspace(1)* %foo
|
%foo1 = load i32, i32 addrspace(1)* %foo
|
||||||
store i32 %foo1, i32* %i
|
store i32 %foo1, i32 addrspace(5)* %i
|
||||||
%foo2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
|
%foo2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
|
||||||
%foo3 = load [1 x float], [1 x float] addrspace(1)* %foo2
|
%foo3 = load [1 x float], [1 x float] addrspace(1)* %foo2
|
||||||
store [1 x float] %foo3, [1 x float]* %f1
|
store [1 x float] %foo3, [1 x float] addrspace(5)* %f1
|
||||||
%foo4 = load i32, i32* %i
|
%foo4 = load i32, i32 addrspace(5)* %i
|
||||||
%foo5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %foo4
|
%foo5 = getelementptr [1 x float], [1 x float] addrspace(5)* %f1, i32 0, i32 %foo4
|
||||||
%foo6 = load float, float* %foo5
|
%foo6 = load float, float addrspace(5)* %foo5
|
||||||
%foo7 = alloca <4 x float>
|
%foo7 = alloca <4 x float>, addrspace(5)
|
||||||
%foo8 = load <4 x float>, <4 x float>* %foo7
|
%foo8 = load <4 x float>, <4 x float> addrspace(5)* %foo7
|
||||||
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
||||||
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
||||||
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
||||||
|
@ -64,42 +64,42 @@ define amdgpu_vs void @promote_1d_aggr() #0 {
|
||||||
|
|
||||||
define amdgpu_vs void @promote_store_aggr() #0 {
|
define amdgpu_vs void @promote_store_aggr() #0 {
|
||||||
; CHECK-LABEL: @promote_store_aggr(
|
; CHECK-LABEL: @promote_store_aggr(
|
||||||
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4
|
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK2:%.*]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 0
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK2:%.*]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 0
|
||||||
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
||||||
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4
|
; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO2:%.*]] = load i32, i32* [[I]], align 4
|
; CHECK-NEXT: [[FOO2:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO2]] to float
|
; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO2]] to float
|
||||||
; CHECK-NEXT: [[FOO4:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 0
|
; CHECK-NEXT: [[FOO4:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 0
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>*
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP1]], align 8
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[FOO3]], i32 0
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[FOO3]], i32 0
|
||||||
; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float>* [[TMP1]], align 8
|
; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float> addrspace(5)* [[TMP1]], align 8
|
||||||
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 1
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 1
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>*
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 8
|
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP4]], align 8
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float 2.000000e+00, i64 1
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float 2.000000e+00, i64 1
|
||||||
; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP4]], align 8
|
; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float> addrspace(5)* [[TMP4]], align 8
|
||||||
; CHECK-NEXT: [[FOO6:%.*]] = load [2 x float], [2 x float]* [[F1]], align 4
|
; CHECK-NEXT: [[FOO6:%.*]] = load [2 x float], [2 x float] addrspace(5)* [[F1]], align 4
|
||||||
; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 1
|
; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 1
|
||||||
; CHECK-NEXT: store [2 x float] [[FOO6]], [2 x float] addrspace(1)* [[FOO7]], align 4
|
; CHECK-NEXT: store [2 x float] [[FOO6]], [2 x float] addrspace(1)* [[FOO7]], align 4
|
||||||
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0
|
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0
|
||||||
; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* [[FOO8]], align 16
|
; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* [[FOO8]], align 16
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%i = alloca i32
|
%i = alloca i32, addrspace(5)
|
||||||
%f1 = alloca [2 x float]
|
%f1 = alloca [2 x float], addrspace(5)
|
||||||
%foo = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
|
%foo = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
|
||||||
%foo1 = load i32, i32 addrspace(1)* %foo
|
%foo1 = load i32, i32 addrspace(1)* %foo
|
||||||
store i32 %foo1, i32* %i
|
store i32 %foo1, i32 addrspace(5)* %i
|
||||||
%foo2 = load i32, i32* %i
|
%foo2 = load i32, i32 addrspace(5)* %i
|
||||||
%foo3 = sitofp i32 %foo2 to float
|
%foo3 = sitofp i32 %foo2 to float
|
||||||
%foo4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0
|
%foo4 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 0
|
||||||
store float %foo3, float* %foo4
|
store float %foo3, float addrspace(5)* %foo4
|
||||||
%foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1
|
%foo5 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 1
|
||||||
store float 2.000000e+00, float* %foo5
|
store float 2.000000e+00, float addrspace(5)* %foo5
|
||||||
%foo6 = load [2 x float], [2 x float]* %f1
|
%foo6 = load [2 x float], [2 x float] addrspace(5)* %f1
|
||||||
%foo7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
|
%foo7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
|
||||||
store [2 x float] %foo6, [2 x float] addrspace(1)* %foo7
|
store [2 x float] %foo6, [2 x float] addrspace(1)* %foo7
|
||||||
%foo8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
|
%foo8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
|
||||||
|
@ -112,21 +112,21 @@ define amdgpu_vs void @promote_store_aggr() #0 {
|
||||||
|
|
||||||
define amdgpu_vs void @promote_load_from_store_aggr() #0 {
|
define amdgpu_vs void @promote_load_from_store_aggr() #0 {
|
||||||
; CHECK-LABEL: @promote_load_from_store_aggr(
|
; CHECK-LABEL: @promote_load_from_store_aggr(
|
||||||
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
|
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4
|
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 1
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 1
|
||||||
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
|
||||||
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4
|
; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK3]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 0
|
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK3]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 0
|
||||||
; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], [2 x float] addrspace(1)* [[FOO2]], align 4
|
; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], [2 x float] addrspace(1)* [[FOO2]], align 4
|
||||||
; CHECK-NEXT: store [2 x float] [[FOO3]], [2 x float]* [[F1]], align 4
|
; CHECK-NEXT: store [2 x float] [[FOO3]], [2 x float] addrspace(5)* [[F1]], align 4
|
||||||
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
|
||||||
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 [[FOO4]]
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 [[FOO4]]
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>*
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP1]], align 8
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO4]]
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO4]]
|
||||||
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16
|
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16
|
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float> addrspace(5)* [[FOO7]], align 16
|
||||||
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[TMP3]], i32 0
|
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[TMP3]], i32 0
|
||||||
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
|
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
|
||||||
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
|
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
|
||||||
|
@ -135,19 +135,19 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
|
||||||
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
|
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%i = alloca i32
|
%i = alloca i32, addrspace(5)
|
||||||
%f1 = alloca [2 x float]
|
%f1 = alloca [2 x float], addrspace(5)
|
||||||
%foo = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
|
%foo = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
|
||||||
%foo1 = load i32, i32 addrspace(1)* %foo
|
%foo1 = load i32, i32 addrspace(1)* %foo
|
||||||
store i32 %foo1, i32* %i
|
store i32 %foo1, i32 addrspace(5)* %i
|
||||||
%foo2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
|
%foo2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
|
||||||
%foo3 = load [2 x float], [2 x float] addrspace(1)* %foo2
|
%foo3 = load [2 x float], [2 x float] addrspace(1)* %foo2
|
||||||
store [2 x float] %foo3, [2 x float]* %f1
|
store [2 x float] %foo3, [2 x float] addrspace(5)* %f1
|
||||||
%foo4 = load i32, i32* %i
|
%foo4 = load i32, i32 addrspace(5)* %i
|
||||||
%foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %foo4
|
%foo5 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 %foo4
|
||||||
%foo6 = load float, float* %foo5
|
%foo6 = load float, float addrspace(5)* %foo5
|
||||||
%foo7 = alloca <4 x float>
|
%foo7 = alloca <4 x float>, addrspace(5)
|
||||||
%foo8 = load <4 x float>, <4 x float>* %foo7
|
%foo8 = load <4 x float>, <4 x float> addrspace(5)* %foo7
|
||||||
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
||||||
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
||||||
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
||||||
|
@ -162,35 +162,35 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
|
||||||
|
|
||||||
define amdgpu_ps void @promote_double_aggr() #0 {
|
define amdgpu_ps void @promote_double_aggr() #0 {
|
||||||
; CHECK-LABEL: @promote_double_aggr(
|
; CHECK-LABEL: @promote_double_aggr(
|
||||||
; CHECK-NEXT: [[S:%.*]] = alloca [2 x double], align 8
|
; CHECK-NEXT: [[S:%.*]] = alloca [2 x double], align 8, addrspace(5)
|
||||||
; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
|
||||||
; CHECK-NEXT: [[FOO1:%.*]] = load double, double addrspace(1)* [[FOO]], align 8
|
; CHECK-NEXT: [[FOO1:%.*]] = load double, double addrspace(1)* [[FOO]], align 8
|
||||||
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
|
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
|
||||||
; CHECK-NEXT: [[FOO3:%.*]] = load double, double addrspace(1)* [[FOO2]], align 8
|
; CHECK-NEXT: [[FOO3:%.*]] = load double, double addrspace(1)* [[FOO2]], align 8
|
||||||
; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
|
; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
|
||||||
; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
|
; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
|
||||||
; CHECK-NEXT: store [2 x double] [[FOO5]], [2 x double]* [[S]], align 8
|
; CHECK-NEXT: store [2 x double] [[FOO5]], [2 x double] addrspace(5)* [[S]], align 8
|
||||||
; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1
|
; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>*
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 16
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP1]], align 16
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 1
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 1
|
||||||
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1
|
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>*
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 16
|
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP4]], align 16
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 1
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 1
|
||||||
; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[TMP3]], [[TMP6]]
|
; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[TMP3]], [[TMP6]]
|
||||||
; CHECK-NEXT: [[FOO11:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0
|
; CHECK-NEXT: [[FOO11:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 0
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>*
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 16
|
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP7]], align 16
|
||||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[FOO10]], i32 0
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[FOO10]], i32 0
|
||||||
; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP7]], align 16
|
; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double> addrspace(5)* [[TMP7]], align 16
|
||||||
; CHECK-NEXT: [[FOO12:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0
|
; CHECK-NEXT: [[FOO12:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 0
|
||||||
; CHECK-NEXT: [[TMP10:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>*
|
; CHECK-NEXT: [[TMP10:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[TMP10]], align 16
|
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP10]], align 16
|
||||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
|
||||||
; CHECK-NEXT: [[FOO14:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1
|
; CHECK-NEXT: [[FOO14:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
|
||||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>*
|
; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
|
||||||
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 16
|
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP13]], align 16
|
||||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 1
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 1
|
||||||
; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[TMP12]], [[TMP15]]
|
; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[TMP12]], [[TMP15]]
|
||||||
; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
|
; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
|
||||||
|
@ -201,25 +201,25 @@ define amdgpu_ps void @promote_double_aggr() #0 {
|
||||||
; CHECK-NEXT: store <4 x float> [[FOO21]], <4 x float> addrspace(1)* @frag_color, align 16
|
; CHECK-NEXT: store <4 x float> [[FOO21]], <4 x float> addrspace(1)* @frag_color, align 16
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%s = alloca [2 x double]
|
%s = alloca [2 x double], addrspace(5)
|
||||||
%foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
|
%foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
|
||||||
%foo1 = load double, double addrspace(1)* %foo
|
%foo1 = load double, double addrspace(1)* %foo
|
||||||
%foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
|
%foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
|
||||||
%foo3 = load double, double addrspace(1)* %foo2
|
%foo3 = load double, double addrspace(1)* %foo2
|
||||||
%foo4 = insertvalue [2 x double] undef, double %foo1, 0
|
%foo4 = insertvalue [2 x double] undef, double %foo1, 0
|
||||||
%foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
|
%foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
|
||||||
store [2 x double] %foo5, [2 x double]* %s
|
store [2 x double] %foo5, [2 x double] addrspace(5)* %s
|
||||||
%foo6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
|
%foo6 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
|
||||||
%foo7 = load double, double* %foo6
|
%foo7 = load double, double addrspace(5)* %foo6
|
||||||
%foo8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
|
%foo8 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
|
||||||
%foo9 = load double, double* %foo8
|
%foo9 = load double, double addrspace(5)* %foo8
|
||||||
%foo10 = fadd double %foo7, %foo9
|
%foo10 = fadd double %foo7, %foo9
|
||||||
%foo11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
|
%foo11 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 0
|
||||||
store double %foo10, double* %foo11
|
store double %foo10, double addrspace(5)* %foo11
|
||||||
%foo12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
|
%foo12 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 0
|
||||||
%foo13 = load double, double* %foo12
|
%foo13 = load double, double addrspace(5)* %foo12
|
||||||
%foo14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
|
%foo14 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
|
||||||
%foo15 = load double, double* %foo14
|
%foo15 = load double, double addrspace(5)* %foo14
|
||||||
%foo16 = fadd double %foo13, %foo15
|
%foo16 = fadd double %foo13, %foo15
|
||||||
%foo17 = fptrunc double %foo16 to float
|
%foo17 = fptrunc double %foo16 to float
|
||||||
%foo18 = insertelement <4 x float> undef, float %foo17, i32 0
|
%foo18 = insertelement <4 x float> undef, float %foo17, i32 0
|
||||||
|
@ -253,6 +253,6 @@ define amdgpu_kernel void @alloca_struct() #0 {
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [2 x %struct], align 4
|
%alloca = alloca [2 x %struct], align 4, addrspace(5)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,44 +4,44 @@
|
||||||
; number of elements.
|
; number of elements.
|
||||||
|
|
||||||
; CHECK-LABEL: @array_alloca(
|
; CHECK-LABEL: @array_alloca(
|
||||||
; CHECK: %stack = alloca i32, i32 5, align 4
|
; CHECK: %stack = alloca i32, i32 5, align 4, addrspace(5)
|
||||||
define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
|
define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca i32, i32 5, align 4
|
%stack = alloca i32, i32 5, align 4, addrspace(5)
|
||||||
%ld0 = load i32, i32 addrspace(1)* %in, align 4
|
%ld0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1
|
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0
|
%arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0
|
||||||
%ld2 = load i32, i32* %arrayidx10, align 4
|
%ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %ld2, i32 addrspace(1)* %out, align 4
|
store i32 %ld2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1
|
%arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1
|
||||||
%ld3 = load i32, i32* %arrayidx12
|
%ld3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %ld3, i32 addrspace(1)* %arrayidx13
|
store i32 %ld3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @array_alloca_dynamic(
|
; CHECK-LABEL: @array_alloca_dynamic(
|
||||||
; CHECK: %stack = alloca i32, i32 %size, align 4
|
; CHECK: %stack = alloca i32, i32 %size, align 4, addrspace(5)
|
||||||
define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 {
|
define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca i32, i32 %size, align 4
|
%stack = alloca i32, i32 %size, align 4, addrspace(5)
|
||||||
%ld0 = load i32, i32 addrspace(1)* %in, align 4
|
%ld0 = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0
|
||||||
store i32 4, i32* %arrayidx1, align 4
|
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
|
||||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
|
||||||
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
|
||||||
%arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1
|
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1
|
||||||
store i32 5, i32* %arrayidx3, align 4
|
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
|
||||||
%arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0
|
%arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0
|
||||||
%ld2 = load i32, i32* %arrayidx10, align 4
|
%ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
|
||||||
store i32 %ld2, i32 addrspace(1)* %out, align 4
|
store i32 %ld2, i32 addrspace(1)* %out, align 4
|
||||||
%arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1
|
%arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1
|
||||||
%ld3 = load i32, i32* %arrayidx12
|
%ld3 = load i32, i32 addrspace(5)* %arrayidx12
|
||||||
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
|
||||||
store i32 %ld3, i32 addrspace(1)* %arrayidx13
|
store i32 %ld3, i32 addrspace(1)* %arrayidx13
|
||||||
ret void
|
ret void
|
||||||
|
|
|
@ -1,28 +1,28 @@
|
||||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck %s
|
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck %s
|
||||||
|
|
||||||
declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
|
declare void @llvm.memcpy.p5i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
|
||||||
declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
|
declare void @llvm.memcpy.p1i8.p5i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i1) #0
|
||||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0
|
declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture, i64, i1) #0
|
||||||
|
|
||||||
declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
|
declare void @llvm.memmove.p5i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
|
||||||
declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
|
declare void @llvm.memmove.p1i8.p5i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i1) #0
|
||||||
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0
|
declare void @llvm.memmove.p5i8.p5i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture, i64, i1) #0
|
||||||
|
|
||||||
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0
|
declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i1) #0
|
||||||
|
|
||||||
declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1
|
declare i32 @llvm.objectsize.i32.p5i8(i8 addrspace(5)*, i1, i1, i1) #1
|
||||||
|
|
||||||
; CHECK-LABEL: @promote_with_memcpy(
|
; CHECK-LABEL: @promote_with_memcpy(
|
||||||
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
|
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
|
||||||
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
||||||
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
|
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
|
||||||
define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||||
%alloca = alloca [17 x i32], align 4
|
%alloca = alloca [17 x i32], align 4, addrspace(5)
|
||||||
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
|
%alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||||
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
||||||
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
||||||
call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
call void @llvm.memcpy.p5i8.p1i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
||||||
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false)
|
call void @llvm.memcpy.p1i8.p5i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(5)* align 4 %alloca.bc, i32 68, i1 false)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,12 +31,12 @@ define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrs
|
||||||
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
||||||
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
|
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
|
||||||
define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||||
%alloca = alloca [17 x i32], align 4
|
%alloca = alloca [17 x i32], align 4, addrspace(5)
|
||||||
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
|
%alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||||
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
||||||
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
||||||
call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
call void @llvm.memmove.p5i8.p1i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
|
||||||
call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false)
|
call void @llvm.memmove.p1i8.p5i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(5)* align 4 %alloca.bc, i32 68, i1 false)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,11 +44,11 @@ define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addr
|
||||||
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
|
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
|
||||||
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
|
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
|
||||||
define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||||
%alloca = alloca [17 x i32], align 4
|
%alloca = alloca [17 x i32], align 4, addrspace(5)
|
||||||
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
|
%alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||||
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
|
||||||
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
|
||||||
call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false)
|
call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,9 +56,9 @@ define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrs
|
||||||
; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
|
; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
|
||||||
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false, i1 false)
|
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false, i1 false)
|
||||||
define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
|
define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
|
||||||
%alloca = alloca [17 x i32], align 4
|
%alloca = alloca [17 x i32], align 4, addrspace(5)
|
||||||
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
|
%alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||||
%size = call i32 @llvm.objectsize.i32.p0i8(i8* %alloca.bc, i1 false, i1 false, i1 false)
|
%size = call i32 @llvm.objectsize.i32.p5i8(i8 addrspace(5)* %alloca.bc, i1 false, i1 false, i1 false)
|
||||||
store i32 %size, i32 addrspace(1)* %out
|
store i32 %size, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -69,12 +69,12 @@ define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
|
||||||
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
||||||
define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) {
|
define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) {
|
||||||
entry:
|
entry:
|
||||||
%r = alloca double, align 8
|
%r = alloca double, align 8, addrspace(5)
|
||||||
%arrayidx1 = getelementptr inbounds double, double* %r, i32 1
|
%arrayidx1 = getelementptr inbounds double, double addrspace(5)* %r, i32 1
|
||||||
%i = bitcast double* %arrayidx1 to i8*
|
%i = bitcast double addrspace(5)* %arrayidx1 to i8 addrspace(5)*
|
||||||
%arrayidx2 = getelementptr inbounds double, double* %r, i32 %c
|
%arrayidx2 = getelementptr inbounds double, double addrspace(5)* %r, i32 %c
|
||||||
%i1 = bitcast double* %arrayidx2 to i8*
|
%i1 = bitcast double addrspace(5)* %arrayidx2 to i8 addrspace(5)*
|
||||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 dereferenceable(16) %i, i8* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 8 dereferenceable(16) %i, i8 addrspace(5)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,12 +84,12 @@ entry:
|
||||||
; CHECK: call void @llvm.memmove.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
; CHECK: call void @llvm.memmove.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
||||||
define amdgpu_kernel void @promote_alloca_used_twice_in_memmove(i32 %c) {
|
define amdgpu_kernel void @promote_alloca_used_twice_in_memmove(i32 %c) {
|
||||||
entry:
|
entry:
|
||||||
%r = alloca double, align 8
|
%r = alloca double, align 8, addrspace(5)
|
||||||
%arrayidx1 = getelementptr inbounds double, double* %r, i32 1
|
%arrayidx1 = getelementptr inbounds double, double addrspace(5)* %r, i32 1
|
||||||
%i = bitcast double* %arrayidx1 to i8*
|
%i = bitcast double addrspace(5)* %arrayidx1 to i8 addrspace(5)*
|
||||||
%arrayidx2 = getelementptr inbounds double, double* %r, i32 %c
|
%arrayidx2 = getelementptr inbounds double, double addrspace(5)* %r, i32 %c
|
||||||
%i1 = bitcast double* %arrayidx2 to i8*
|
%i1 = bitcast double addrspace(5)* %arrayidx2 to i8 addrspace(5)*
|
||||||
call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 dereferenceable(16) %i, i8* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
call void @llvm.memmove.p5i8.p5i8.i64(i8 addrspace(5)* align 8 dereferenceable(16) %i, i8 addrspace(5)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,18 +31,18 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 add
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
|
; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
|
||||||
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
|
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
|
||||||
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
||||||
; NOLDS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[B:%.*]]
|
; NOLDS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[B:%.*]]
|
||||||
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
|
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
|
||||||
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
||||||
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||||
; NOLDS-NEXT: ret void
|
; NOLDS-NEXT: ret void
|
||||||
;
|
;
|
||||||
%alloca = alloca [16 x i32], align 4
|
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
|
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
|
||||||
%cmp = icmp eq i32* %ptr0, %ptr1
|
%cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1
|
||||||
%zext = zext i1 %cmp to i32
|
%zext = zext i1 %cmp to i32
|
||||||
store volatile i32 %zext, i32 addrspace(1)* %out
|
store volatile i32 %zext, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
|
@ -73,16 +73,16 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs(
|
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs(
|
||||||
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
|
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
|
||||||
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
||||||
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], null
|
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], null
|
||||||
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
||||||
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||||
; NOLDS-NEXT: ret void
|
; NOLDS-NEXT: ret void
|
||||||
;
|
;
|
||||||
%alloca = alloca [16 x i32], align 4
|
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
%cmp = icmp eq i32* %ptr0, null
|
%cmp = icmp eq i32 addrspace(5)* %ptr0, null
|
||||||
%zext = zext i1 %cmp to i32
|
%zext = zext i1 %cmp to i32
|
||||||
store volatile i32 %zext, i32 addrspace(1)* %out
|
store volatile i32 %zext, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
|
@ -113,16 +113,16 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs(
|
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs(
|
||||||
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
|
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
|
||||||
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
||||||
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* null, [[PTR0]]
|
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* null, [[PTR0]]
|
||||||
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
||||||
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||||
; NOLDS-NEXT: ret void
|
; NOLDS-NEXT: ret void
|
||||||
;
|
;
|
||||||
%alloca = alloca [16 x i32], align 4
|
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
%cmp = icmp eq i32* null, %ptr0
|
%cmp = icmp eq i32 addrspace(5)* null, %ptr0
|
||||||
%zext = zext i1 %cmp to i32
|
%zext = zext i1 %cmp to i32
|
||||||
store volatile i32 %zext, i32 addrspace(1)* %out
|
store volatile i32 %zext, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
|
@ -130,32 +130,32 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %
|
||||||
|
|
||||||
define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||||
; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
|
; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
|
||||||
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
|
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
|
||||||
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
||||||
; CHECK-NEXT: [[PTR1:%.*]] = call i32* @get_unknown_pointer()
|
; CHECK-NEXT: [[PTR1:%.*]] = call i32 addrspace(5)* @get_unknown_pointer()
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
|
||||||
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
||||||
; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
|
; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
|
||||||
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4
|
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
|
||||||
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
|
||||||
; NOLDS-NEXT: [[PTR1:%.*]] = call i32* @get_unknown_pointer()
|
; NOLDS-NEXT: [[PTR1:%.*]] = call i32 addrspace(5)* @get_unknown_pointer()
|
||||||
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]]
|
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
|
||||||
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
|
||||||
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
|
||||||
; NOLDS-NEXT: ret void
|
; NOLDS-NEXT: ret void
|
||||||
;
|
;
|
||||||
%alloca = alloca [16 x i32], align 4
|
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
%ptr1 = call i32* @get_unknown_pointer()
|
%ptr1 = call i32 addrspace(5)* @get_unknown_pointer()
|
||||||
%cmp = icmp eq i32* %ptr0, %ptr1
|
%cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1
|
||||||
%zext = zext i1 %cmp to i32
|
%zext = zext i1 %cmp to i32
|
||||||
store volatile i32 %zext, i32 addrspace(1)* %out
|
store volatile i32 %zext, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare i32* @get_unknown_pointer() #0
|
declare i32 addrspace(5)* @get_unknown_pointer() #0
|
||||||
|
|
||||||
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
|
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
|
||||||
|
|
|
@ -15,20 +15,20 @@
|
||||||
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
||||||
define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
|
define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
br i1 undef, label %if, label %else
|
br i1 undef, label %if, label %else
|
||||||
|
|
||||||
if:
|
if:
|
||||||
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
else:
|
else:
|
||||||
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
|
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %b
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
endif:
|
endif:
|
||||||
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
%phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
||||||
store i32 0, i32* %phi.ptr, align 4
|
store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,16 +36,16 @@ endif:
|
||||||
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
|
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
|
||||||
define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
|
define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
br i1 undef, label %if, label %endif
|
br i1 undef, label %if, label %endif
|
||||||
|
|
||||||
if:
|
if:
|
||||||
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
endif:
|
endif:
|
||||||
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
|
%phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ null, %entry ]
|
||||||
store i32 0, i32* %phi.ptr, align 4
|
store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,16 +53,16 @@ endif:
|
||||||
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
|
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
|
||||||
define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
|
define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
br i1 undef, label %if, label %endif
|
br i1 undef, label %if, label %endif
|
||||||
|
|
||||||
if:
|
if:
|
||||||
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
endif:
|
endif:
|
||||||
%phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
|
%phi.ptr = phi i32 addrspace(5)* [ null, %entry ], [ %arrayidx0, %if ]
|
||||||
store i32 0, i32* %phi.ptr, align 4
|
store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,13 +75,13 @@ endif:
|
||||||
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
||||||
define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
|
define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
br label %exit
|
br label %exit
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
%phi.ptr = phi i32* [ %arrayidx0, %entry ]
|
%phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %entry ]
|
||||||
store i32 0, i32* %phi.ptr, align 4
|
store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,30 +89,30 @@ exit:
|
||||||
; CHECK: %alloca = alloca [64 x i32], align 4
|
; CHECK: %alloca = alloca [64 x i32], align 4
|
||||||
|
|
||||||
; CHECK: if:
|
; CHECK: if:
|
||||||
; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
|
|
||||||
; CHECK: else:
|
; CHECK: else:
|
||||||
; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
|
; CHECK: %arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer()
|
||||||
|
|
||||||
; CHECK: endif:
|
; CHECK: endif:
|
||||||
; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
; CHECK: %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
||||||
; CHECK: store i32 0, i32* %phi.ptr, align 4
|
; CHECK: store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
|
define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
br i1 undef, label %if, label %else
|
br i1 undef, label %if, label %else
|
||||||
|
|
||||||
if:
|
if:
|
||||||
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
else:
|
else:
|
||||||
%arrayidx1 = call i32* @get_unknown_pointer()
|
%arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer()
|
||||||
br label %endif
|
br label %endif
|
||||||
|
|
||||||
endif:
|
endif:
|
||||||
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
%phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
||||||
store i32 0, i32* %phi.ptr, align 4
|
store i32 0, i32 addrspace(5)* %phi.ptr, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,12 +133,12 @@ endif:
|
||||||
|
|
||||||
; CHECK-LABEL: @ptr_induction_var_same_alloca(
|
; CHECK-LABEL: @ptr_induction_var_same_alloca(
|
||||||
; CHECK: %alloca = alloca [64 x i32], align 4
|
; CHECK: %alloca = alloca [64 x i32], align 4
|
||||||
; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
; CHECK: phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
||||||
define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
|
define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
|
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2
|
||||||
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
|
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 48
|
||||||
br label %for.body
|
br label %for.body
|
||||||
|
|
||||||
for.cond.cleanup: ; preds = %for.body
|
for.cond.cleanup: ; preds = %for.body
|
||||||
|
@ -146,11 +146,11 @@ for.cond.cleanup: ; preds = %for.body
|
||||||
|
|
||||||
for.body: ; preds = %for.body, %entry
|
for.body: ; preds = %for.body, %entry
|
||||||
%i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
%i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||||
%p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
%p.08 = phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
||||||
store i32 %i.09, i32* %p.08, align 4
|
store i32 %i.09, i32 addrspace(5)* %p.08, align 4
|
||||||
%incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
|
%incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1
|
||||||
%inc = add nuw nsw i32 %i.09, 1
|
%inc = add nuw nsw i32 %i.09, 1
|
||||||
%cmp = icmp eq i32* %incdec.ptr, %arrayidx1
|
%cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %arrayidx1
|
||||||
br i1 %cmp, label %for.cond.cleanup, label %for.body
|
br i1 %cmp, label %for.cond.cleanup, label %for.body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,14 +170,14 @@ for.body: ; preds = %for.body, %entry
|
||||||
|
|
||||||
; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
|
; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
|
||||||
; CHECK: %alloca = alloca [64 x i32], align 4
|
; CHECK: %alloca = alloca [64 x i32], align 4
|
||||||
; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
; CHECK: %p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
||||||
; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
|
; CHECK: %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call
|
||||||
define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
|
define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca [64 x i32], align 4
|
%alloca = alloca [64 x i32], align 4, addrspace(5)
|
||||||
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
|
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2
|
||||||
%call = tail call i32* @get_unknown_pointer() #2
|
%call = tail call i32 addrspace(5)* @get_unknown_pointer() #2
|
||||||
%cmp.7 = icmp eq i32* %arrayidx, %call
|
%cmp.7 = icmp eq i32 addrspace(5)* %arrayidx, %call
|
||||||
br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
|
br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
|
||||||
|
|
||||||
for.body.preheader: ; preds = %entry
|
for.body.preheader: ; preds = %entry
|
||||||
|
@ -191,14 +191,14 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
|
||||||
|
|
||||||
for.body: ; preds = %for.body, %for.body.preheader
|
for.body: ; preds = %for.body, %for.body.preheader
|
||||||
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
||||||
%p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
%p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
||||||
store i32 %i.09, i32* %p.08, align 4
|
store i32 %i.09, i32 addrspace(5)* %p.08, align 4
|
||||||
%incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
|
%incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1
|
||||||
%inc = add nuw nsw i32 %i.09, 1
|
%inc = add nuw nsw i32 %i.09, 1
|
||||||
%cmp = icmp eq i32* %incdec.ptr, %call
|
%cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call
|
||||||
br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
|
br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
|
||||||
}
|
}
|
||||||
|
|
||||||
declare i32* @get_unknown_pointer() #0
|
declare i32 addrspace(5)* @get_unknown_pointer() #0
|
||||||
|
|
||||||
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
|
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
|
||||||
|
|
|
@ -2,26 +2,26 @@
|
||||||
|
|
||||||
; CHECK-LABEL: @volatile_load(
|
; CHECK-LABEL: @volatile_load(
|
||||||
; CHECK: alloca [4 x i32]
|
; CHECK: alloca [4 x i32]
|
||||||
; CHECK: load volatile i32, i32*
|
; CHECK: load volatile i32, i32 addrspace(5)*
|
||||||
define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [4 x i32], align 4
|
%stack = alloca [4 x i32], align 4, addrspace(5)
|
||||||
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp
|
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
|
||||||
%load = load volatile i32, i32* %arrayidx1
|
%load = load volatile i32, i32 addrspace(5)* %arrayidx1
|
||||||
store i32 %load, i32 addrspace(1)* %out
|
store i32 %load, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @volatile_store(
|
; CHECK-LABEL: @volatile_store(
|
||||||
; CHECK: alloca [4 x i32]
|
; CHECK: alloca [4 x i32]
|
||||||
; CHECK: store volatile i32 %tmp, i32*
|
; CHECK: store volatile i32 %tmp, i32 addrspace(5)*
|
||||||
define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
||||||
entry:
|
entry:
|
||||||
%stack = alloca [4 x i32], align 4
|
%stack = alloca [4 x i32], align 4, addrspace(5)
|
||||||
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
%tmp = load i32, i32 addrspace(1)* %in, align 4
|
||||||
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp
|
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
|
||||||
store volatile i32 %tmp, i32* %arrayidx1
|
store volatile i32 %tmp, i32 addrspace(5)* %arrayidx1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,11 +32,11 @@ entry:
|
||||||
; CHECK: load volatile double
|
; CHECK: load volatile double
|
||||||
define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 {
|
define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = alloca double, align 8
|
%tmp = alloca double, align 8, addrspace(5)
|
||||||
store double 0.000000e+00, double* %tmp, align 8
|
store double 0.000000e+00, double addrspace(5)* %tmp, align 8
|
||||||
|
|
||||||
%tmp4 = load double, double* %tmp, align 8
|
%tmp4 = load double, double addrspace(5)* %tmp, align 8
|
||||||
%tmp5 = load volatile double, double* %tmp, align 8
|
%tmp5 = load volatile double, double addrspace(5)* %tmp, align 8
|
||||||
|
|
||||||
store double %tmp4, double addrspace(1)* %arg
|
store double %tmp4, double addrspace(1)* %arg
|
||||||
ret void
|
ret void
|
||||||
|
|
|
@ -4,34 +4,34 @@
|
||||||
|
|
||||||
; CHECK-LABEL: @test_insertelement(
|
; CHECK-LABEL: @test_insertelement(
|
||||||
; CHECK: %alloca = alloca i16
|
; CHECK: %alloca = alloca i16
|
||||||
; CHECK-NEXT: insertelement <2 x i16*> undef, i16* %alloca, i32 0
|
; CHECK-NEXT: insertelement <2 x i16 addrspace(5)*> undef, i16 addrspace(5)* %alloca, i32 0
|
||||||
define amdgpu_kernel void @test_insertelement() #0 {
|
define amdgpu_kernel void @test_insertelement() #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca i16, align 4
|
%alloca = alloca i16, align 4, addrspace(5)
|
||||||
%in = insertelement <2 x i16*> undef, i16* %alloca, i32 0
|
%in = insertelement <2 x i16 addrspace(5)*> undef, i16 addrspace(5)* %alloca, i32 0
|
||||||
store <2 x i16*> %in, <2 x i16*>* undef, align 4
|
store <2 x i16 addrspace(5)*> %in, <2 x i16 addrspace(5)*>* undef, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @test_insertvalue(
|
; CHECK-LABEL: @test_insertvalue(
|
||||||
; CHECK: %alloca = alloca i16
|
; CHECK: %alloca = alloca i16
|
||||||
; CHECK-NEXT: insertvalue { i16* } undef, i16* %alloca, 0
|
; CHECK-NEXT: insertvalue { i16 addrspace(5)* } undef, i16 addrspace(5)* %alloca, 0
|
||||||
define amdgpu_kernel void @test_insertvalue() #0 {
|
define amdgpu_kernel void @test_insertvalue() #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca i16, align 4
|
%alloca = alloca i16, align 4, addrspace(5)
|
||||||
%in = insertvalue { i16* } undef, i16* %alloca, 0
|
%in = insertvalue { i16 addrspace(5)* } undef, i16 addrspace(5)* %alloca, 0
|
||||||
store { i16* } %in, { i16* }* undef, align 4
|
store { i16 addrspace(5)* } %in, { i16 addrspace(5)* }* undef, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @test_insertvalue_array(
|
; CHECK-LABEL: @test_insertvalue_array(
|
||||||
; CHECK: %alloca = alloca i16
|
; CHECK: %alloca = alloca i16
|
||||||
; CHECK-NEXT: insertvalue [2 x i16*] undef, i16* %alloca, 0
|
; CHECK-NEXT: insertvalue [2 x i16 addrspace(5)*] undef, i16 addrspace(5)* %alloca, 0
|
||||||
define amdgpu_kernel void @test_insertvalue_array() #0 {
|
define amdgpu_kernel void @test_insertvalue_array() #0 {
|
||||||
entry:
|
entry:
|
||||||
%alloca = alloca i16, align 4
|
%alloca = alloca i16, align 4, addrspace(5)
|
||||||
%in = insertvalue [2 x i16*] undef, i16* %alloca, 0
|
%in = insertvalue [2 x i16 addrspace(5)*] undef, i16 addrspace(5)* %alloca, 0
|
||||||
store [2 x i16*] %in, [2 x i16*]* undef, align 4
|
store [2 x i16 addrspace(5)*] %in, [2 x i16 addrspace(5)*]* undef, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue